{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7138213662031077, "eval_steps": 500, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "loss_breakdown/lm_loss": 3.621316432952881, "loss_breakdown/pointer_loss": 2.2621192932128906, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.161348819732666, "loss_breakdown/pointer_loss": 1.7895783185958862, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.193967819213867, "loss_breakdown/pointer_loss": 1.1925519704818726, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.43925142288208, "loss_breakdown/pointer_loss": 1.7150626182556152, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.393280982971191, "loss_breakdown/pointer_loss": 1.3689972162246704, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.243700981140137, "loss_breakdown/pointer_loss": 0.7171953320503235, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.811887741088867, "loss_breakdown/pointer_loss": 0.8133865594863892, "step": 0 }, { "epoch": 0, "loss_breakdown/lm_loss": 4.995602607727051, "loss_breakdown/pointer_loss": 1.5957460403442383, "step": 0 }, { "epoch": 0.0010197448088615825, "grad_norm": 251.8531306140226, "learning_rate": 4.587155963302753e-08, "loss": 7.1689, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 7.078155040740967, "loss_breakdown/pointer_loss": 0.7451821565628052, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 9.61131763458252, "loss_breakdown/pointer_loss": 0.3190159499645233, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 6.596867561340332, "loss_breakdown/pointer_loss": 0.6196718215942383, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 6.645041465759277, "loss_breakdown/pointer_loss": 1.1706340312957764, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 11.068497657775879, "loss_breakdown/pointer_loss": 0.38412001729011536, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 10.921542167663574, "loss_breakdown/pointer_loss": 2.028818130493164, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 10.850740432739258, "loss_breakdown/pointer_loss": 0.47888627648353577, "step": 10 }, { "epoch": 0.0010197448088615825, "loss_breakdown/lm_loss": 11.47044849395752, "loss_breakdown/pointer_loss": 0.216935932636261, "step": 10 }, { "epoch": 0.002039489617723165, "grad_norm": 176.6631483309473, "learning_rate": 9.683995922528033e-08, "loss": 8.2169, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 5.127109050750732, "loss_breakdown/pointer_loss": 1.3047711849212646, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 5.198714256286621, "loss_breakdown/pointer_loss": 0.9247815012931824, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 5.410231113433838, "loss_breakdown/pointer_loss": 0.6898661851882935, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 5.328085899353027, "loss_breakdown/pointer_loss": 0.5360652208328247, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 4.546687602996826, "loss_breakdown/pointer_loss": 0.7950313687324524, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 5.4591264724731445, "loss_breakdown/pointer_loss": 0.5612685680389404, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 4.468176364898682, "loss_breakdown/pointer_loss": 0.5124301314353943, "step": 20 }, { "epoch": 0.002039489617723165, "loss_breakdown/lm_loss": 4.826949119567871, "loss_breakdown/pointer_loss": 1.084397315979004, "step": 20 }, { "epoch": 0.003059234426584747, "grad_norm": 410.12328028720935, "learning_rate": 1.4780835881753314e-07, "loss": 7.557, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 10.641229629516602, "loss_breakdown/pointer_loss": 0.787895679473877, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 10.810383796691895, "loss_breakdown/pointer_loss": 1.1552166938781738, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 10.897473335266113, "loss_breakdown/pointer_loss": 0.44021326303482056, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 11.244626998901367, "loss_breakdown/pointer_loss": 0.9907925128936768, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 10.7892484664917, "loss_breakdown/pointer_loss": 0.1421506255865097, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 10.770855903625488, "loss_breakdown/pointer_loss": 0.1858416646718979, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 11.321913719177246, "loss_breakdown/pointer_loss": 2.525510311126709, "step": 30 }, { "epoch": 0.003059234426584747, "loss_breakdown/lm_loss": 10.42403793334961, "loss_breakdown/pointer_loss": 0.14007282257080078, "step": 30 }, { "epoch": 0.00407897923544633, "grad_norm": 177.24171674975088, "learning_rate": 1.9877675840978594e-07, "loss": 7.4065, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 4.636320114135742, "loss_breakdown/pointer_loss": 0.8786600828170776, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 5.424086570739746, "loss_breakdown/pointer_loss": 0.4183638095855713, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 4.761468887329102, "loss_breakdown/pointer_loss": 1.116649866104126, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 5.577417850494385, "loss_breakdown/pointer_loss": 0.8212469220161438, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 5.9191484451293945, "loss_breakdown/pointer_loss": 0.9190829992294312, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 5.362215995788574, "loss_breakdown/pointer_loss": 0.6789848804473877, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 4.950235843658447, "loss_breakdown/pointer_loss": 0.4192552864551544, "step": 40 }, { "epoch": 0.00407897923544633, "loss_breakdown/lm_loss": 7.140505313873291, "loss_breakdown/pointer_loss": 0.6684707403182983, "step": 40 }, { "epoch": 0.005098724044307912, "grad_norm": 537.2754710305369, "learning_rate": 2.497451580020388e-07, "loss": 7.2861, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 3.3454556465148926, "loss_breakdown/pointer_loss": 2.154041290283203, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 3.922489643096924, "loss_breakdown/pointer_loss": 0.9447371959686279, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 3.694833517074585, "loss_breakdown/pointer_loss": 0.811421275138855, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 3.830371141433716, "loss_breakdown/pointer_loss": 1.0707495212554932, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 4.016317844390869, "loss_breakdown/pointer_loss": 1.2089232206344604, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 4.249221324920654, "loss_breakdown/pointer_loss": 0.8842304944992065, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 4.061791896820068, "loss_breakdown/pointer_loss": 1.195582628250122, "step": 50 }, { "epoch": 0.005098724044307912, "loss_breakdown/lm_loss": 4.152310371398926, "loss_breakdown/pointer_loss": 0.47476622462272644, "step": 50 }, { "epoch": 0.006118468853169494, "grad_norm": 300.8680248607358, "learning_rate": 3.0071355759429153e-07, "loss": 5.6198, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 4.926963806152344, "loss_breakdown/pointer_loss": 0.736196756362915, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 5.090798377990723, "loss_breakdown/pointer_loss": 0.3974386751651764, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 7.90191650390625, "loss_breakdown/pointer_loss": 0.3696476221084595, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 5.083974838256836, "loss_breakdown/pointer_loss": 2.1740503311157227, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 5.246724605560303, "loss_breakdown/pointer_loss": 0.407073050737381, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 7.795088768005371, "loss_breakdown/pointer_loss": 0.5333921313285828, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 7.310258865356445, "loss_breakdown/pointer_loss": 0.46332094073295593, "step": 60 }, { "epoch": 0.006118468853169494, "loss_breakdown/lm_loss": 7.295444011688232, "loss_breakdown/pointer_loss": 0.33424001932144165, "step": 60 }, { "epoch": 0.007138213662031077, "grad_norm": 90.04204553807357, "learning_rate": 3.516819571865444e-07, "loss": 5.6525, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.105755567550659, "loss_breakdown/pointer_loss": 0.3788454532623291, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.288827657699585, "loss_breakdown/pointer_loss": 0.443735808134079, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 4.225527286529541, "loss_breakdown/pointer_loss": 0.8985816240310669, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.0917739868164062, "loss_breakdown/pointer_loss": 2.049051284790039, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.3278586864471436, "loss_breakdown/pointer_loss": 0.6660740375518799, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.375443458557129, "loss_breakdown/pointer_loss": 0.6418499946594238, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.417201042175293, "loss_breakdown/pointer_loss": 0.6161723136901855, "step": 70 }, { "epoch": 0.007138213662031077, "loss_breakdown/lm_loss": 3.1409215927124023, "loss_breakdown/pointer_loss": 1.407525897026062, "step": 70 }, { "epoch": 0.00815795847089266, "grad_norm": 155.54778132900844, "learning_rate": 4.026503567787972e-07, "loss": 4.5059, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.257227420806885, "loss_breakdown/pointer_loss": 0.07656662911176682, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 4.989464282989502, "loss_breakdown/pointer_loss": 0.10317564010620117, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.135498523712158, "loss_breakdown/pointer_loss": 3.06750226020813, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.091057777404785, "loss_breakdown/pointer_loss": 0.28640466928482056, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.562379837036133, "loss_breakdown/pointer_loss": 0.37002140283584595, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.15165376663208, "loss_breakdown/pointer_loss": 0.1747702658176422, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.015064716339111, "loss_breakdown/pointer_loss": 0.2591073513031006, "step": 80 }, { "epoch": 0.00815795847089266, "loss_breakdown/lm_loss": 5.186559200286865, "loss_breakdown/pointer_loss": 0.13410735130310059, "step": 80 }, { "epoch": 0.009177703279754241, "grad_norm": 41.49224871201546, "learning_rate": 4.5361875637105e-07, "loss": 3.7493, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.38481068611145, "loss_breakdown/pointer_loss": 0.7123165130615234, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.3609459400177, "loss_breakdown/pointer_loss": 0.570091724395752, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.375513792037964, "loss_breakdown/pointer_loss": 0.9337683916091919, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.4389140605926514, "loss_breakdown/pointer_loss": 1.3545399904251099, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.4382598400115967, "loss_breakdown/pointer_loss": 0.9071243405342102, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.7851061820983887, "loss_breakdown/pointer_loss": 0.9980993866920471, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.718585968017578, "loss_breakdown/pointer_loss": 0.6905503273010254, "step": 90 }, { "epoch": 0.009177703279754241, "loss_breakdown/lm_loss": 2.969038963317871, "loss_breakdown/pointer_loss": 1.1813417673110962, "step": 90 }, { "epoch": 0.010197448088615824, "grad_norm": 59.6419351954722, "learning_rate": 5.045871559633028e-07, "loss": 3.4989, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.8799704313278198, "loss_breakdown/pointer_loss": 2.6309974193573, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.8846296072006226, "loss_breakdown/pointer_loss": 1.7250163555145264, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.8922418355941772, "loss_breakdown/pointer_loss": 1.2737928628921509, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.9260472059249878, "loss_breakdown/pointer_loss": 1.3011733293533325, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.911391019821167, "loss_breakdown/pointer_loss": 1.1152949333190918, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.927060604095459, "loss_breakdown/pointer_loss": 1.1031395196914673, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.9493118524551392, "loss_breakdown/pointer_loss": 0.6602797508239746, "step": 100 }, { "epoch": 0.010197448088615824, "loss_breakdown/lm_loss": 1.9367471933364868, "loss_breakdown/pointer_loss": 0.5347182154655457, "step": 100 }, { "epoch": 0.011217192897477406, "grad_norm": 84.35858503010405, "learning_rate": 5.555555555555555e-07, "loss": 2.6382, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 1.981368899345398, "loss_breakdown/pointer_loss": 0.29690995812416077, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 1.9368324279785156, "loss_breakdown/pointer_loss": 0.6688476800918579, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 1.9903862476348877, "loss_breakdown/pointer_loss": 0.7688241004943848, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 2.104680299758911, "loss_breakdown/pointer_loss": 1.9366259574890137, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 2.271221399307251, "loss_breakdown/pointer_loss": 0.3832976818084717, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 2.2657949924468994, "loss_breakdown/pointer_loss": 0.6736089587211609, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 2.984160900115967, "loss_breakdown/pointer_loss": 0.4771953821182251, "step": 110 }, { "epoch": 0.011217192897477406, "loss_breakdown/lm_loss": 2.572535991668701, "loss_breakdown/pointer_loss": 1.8616753816604614, "step": 110 }, { "epoch": 0.012236937706338989, "grad_norm": 32.131177323210494, "learning_rate": 6.065239551478084e-07, "loss": 2.4177, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.09473717212677, "loss_breakdown/pointer_loss": 1.0094702243804932, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.1102298498153687, "loss_breakdown/pointer_loss": 1.099741816520691, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.103190541267395, "loss_breakdown/pointer_loss": 0.7522318363189697, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.1906945705413818, "loss_breakdown/pointer_loss": 0.4468645751476288, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.176520824432373, "loss_breakdown/pointer_loss": 0.8391753435134888, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.1782389879226685, "loss_breakdown/pointer_loss": 0.6060844659805298, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.1556634902954102, "loss_breakdown/pointer_loss": 0.49356797337532043, "step": 120 }, { "epoch": 0.012236937706338989, "loss_breakdown/lm_loss": 1.2217196226119995, "loss_breakdown/pointer_loss": 0.438062846660614, "step": 120 }, { "epoch": 0.01325668251520057, "grad_norm": 41.12525321567167, "learning_rate": 6.574923547400612e-07, "loss": 1.8924, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 2.055011034011841, "loss_breakdown/pointer_loss": 0.3551696538925171, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 1.9856034517288208, "loss_breakdown/pointer_loss": 2.0056509971618652, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 2.094945192337036, "loss_breakdown/pointer_loss": 0.13370761275291443, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 2.0342767238616943, "loss_breakdown/pointer_loss": 0.23325546085834503, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 2.008957624435425, "loss_breakdown/pointer_loss": 0.1280471682548523, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 1.988806962966919, "loss_breakdown/pointer_loss": 0.15311449766159058, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 2.0141892433166504, "loss_breakdown/pointer_loss": 0.310552179813385, "step": 130 }, { "epoch": 0.01325668251520057, "loss_breakdown/lm_loss": 2.0412392616271973, "loss_breakdown/pointer_loss": 0.5493862628936768, "step": 130 }, { "epoch": 0.014276427324062153, "grad_norm": 21.896923899949215, "learning_rate": 7.084607543323139e-07, "loss": 1.4617, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.39815422892570496, "loss_breakdown/pointer_loss": 0.7887144088745117, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.6676346659660339, "loss_breakdown/pointer_loss": 0.39136895537376404, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.43645748496055603, "loss_breakdown/pointer_loss": 0.6833735108375549, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.44846946001052856, "loss_breakdown/pointer_loss": 1.382899522781372, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.45944103598594666, "loss_breakdown/pointer_loss": 0.3436807692050934, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.7238070964813232, "loss_breakdown/pointer_loss": 0.5381367802619934, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.6525053381919861, "loss_breakdown/pointer_loss": 2.545036792755127, "step": 140 }, { "epoch": 0.014276427324062153, "loss_breakdown/lm_loss": 0.6580550670623779, "loss_breakdown/pointer_loss": 0.6676253080368042, "step": 140 }, { "epoch": 0.015296172132923736, "grad_norm": 41.72068573731863, "learning_rate": 7.594291539245668e-07, "loss": 1.0999, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.11425139009952545, "loss_breakdown/pointer_loss": 2.6071689128875732, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.10148165374994278, "loss_breakdown/pointer_loss": 1.0339696407318115, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.13686515390872955, "loss_breakdown/pointer_loss": 1.0501896142959595, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.11132795363664627, "loss_breakdown/pointer_loss": 1.592862606048584, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.1206415519118309, "loss_breakdown/pointer_loss": 0.8244308233261108, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.12266389280557632, "loss_breakdown/pointer_loss": 0.426207572221756, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.13524538278579712, "loss_breakdown/pointer_loss": 0.4658691883087158, "step": 150 }, { "epoch": 0.015296172132923736, "loss_breakdown/lm_loss": 0.12857991456985474, "loss_breakdown/pointer_loss": 0.6754746437072754, "step": 150 }, { "epoch": 0.01631591694178532, "grad_norm": 16.8393556478719, "learning_rate": 8.103975535168196e-07, "loss": 0.692, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.18504884839057922, "loss_breakdown/pointer_loss": 0.41571158170700073, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.21067380905151367, "loss_breakdown/pointer_loss": 0.35413676500320435, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.48012760281562805, "loss_breakdown/pointer_loss": 0.3637324571609497, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.48318764567375183, "loss_breakdown/pointer_loss": 0.9940458536148071, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.22012346982955933, "loss_breakdown/pointer_loss": 0.33733028173446655, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.4717722535133362, "loss_breakdown/pointer_loss": 0.26681387424468994, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.47183480858802795, "loss_breakdown/pointer_loss": 0.7638391256332397, "step": 160 }, { "epoch": 0.01631591694178532, "loss_breakdown/lm_loss": 0.4737981855869293, "loss_breakdown/pointer_loss": 0.17795975506305695, "step": 160 }, { "epoch": 0.0173356617506469, "grad_norm": 22.068659189835167, "learning_rate": 8.613659531090725e-07, "loss": 0.6225, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.016549861058592796, "loss_breakdown/pointer_loss": 0.814637303352356, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.02084779180586338, "loss_breakdown/pointer_loss": 0.39283791184425354, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.019962504506111145, "loss_breakdown/pointer_loss": 1.267956018447876, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.01889854297041893, "loss_breakdown/pointer_loss": 0.8015948534011841, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.02673494629561901, "loss_breakdown/pointer_loss": 0.739992618560791, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.019315458834171295, "loss_breakdown/pointer_loss": 0.48419129848480225, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.021172337234020233, "loss_breakdown/pointer_loss": 0.8151066303253174, "step": 170 }, { "epoch": 0.0173356617506469, "loss_breakdown/lm_loss": 0.02274957299232483, "loss_breakdown/pointer_loss": 0.5831974744796753, "step": 170 }, { "epoch": 0.018355406559508482, "grad_norm": 39.79608503009726, "learning_rate": 9.123343527013252e-07, "loss": 0.4275, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.0180575679987669, "loss_breakdown/pointer_loss": 1.0580620765686035, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.014364469796419144, "loss_breakdown/pointer_loss": 0.31832247972488403, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.01688387431204319, "loss_breakdown/pointer_loss": 0.5897045731544495, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.015556861646473408, "loss_breakdown/pointer_loss": 0.17709311842918396, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.01671162061393261, "loss_breakdown/pointer_loss": 0.8145598769187927, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.014449874870479107, "loss_breakdown/pointer_loss": 2.363811731338501, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.01364304218441248, "loss_breakdown/pointer_loss": 0.541179895401001, "step": 180 }, { "epoch": 0.018355406559508482, "loss_breakdown/lm_loss": 0.020331043750047684, "loss_breakdown/pointer_loss": 0.9517450332641602, "step": 180 }, { "epoch": 0.019375151368370064, "grad_norm": 10.305285123376729, "learning_rate": 9.633027522935782e-07, "loss": 0.449, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.0062951515428721905, "loss_breakdown/pointer_loss": 0.8085229396820068, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.005410932470113039, "loss_breakdown/pointer_loss": 0.37319642305374146, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.00725537957623601, "loss_breakdown/pointer_loss": 1.1929428577423096, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.005377820692956448, "loss_breakdown/pointer_loss": 0.37770605087280273, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.0059485356323421, "loss_breakdown/pointer_loss": 0.22923019528388977, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.006831211503595114, "loss_breakdown/pointer_loss": 0.21968233585357666, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.0066751777194440365, "loss_breakdown/pointer_loss": 0.9994868636131287, "step": 190 }, { "epoch": 0.019375151368370064, "loss_breakdown/lm_loss": 0.007901920937001705, "loss_breakdown/pointer_loss": 0.9455338716506958, "step": 190 }, { "epoch": 0.02039489617723165, "grad_norm": 22.6821437638931, "learning_rate": 1.014271151885831e-06, "loss": 0.4184, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.007565149571746588, "loss_breakdown/pointer_loss": 2.4777300357818604, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.005785479210317135, "loss_breakdown/pointer_loss": 1.0433319807052612, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.004865197464823723, "loss_breakdown/pointer_loss": 0.8207952976226807, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.004662915598601103, "loss_breakdown/pointer_loss": 0.513914942741394, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.00489435950294137, "loss_breakdown/pointer_loss": 0.8318243026733398, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.0044934069737792015, "loss_breakdown/pointer_loss": 0.6931023001670837, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.004709502216428518, "loss_breakdown/pointer_loss": 0.9542927742004395, "step": 200 }, { "epoch": 0.02039489617723165, "loss_breakdown/lm_loss": 0.00437081279233098, "loss_breakdown/pointer_loss": 0.3535591959953308, "step": 200 }, { "epoch": 0.02141464098609323, "grad_norm": 14.340163835128727, "learning_rate": 1.0652395514780836e-06, "loss": 0.3963, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.005601602140814066, "loss_breakdown/pointer_loss": 0.5726017951965332, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.005440936889499426, "loss_breakdown/pointer_loss": 0.9331775307655334, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.005166669841855764, "loss_breakdown/pointer_loss": 0.6443816423416138, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.004773283377289772, "loss_breakdown/pointer_loss": 1.288786768913269, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.005834249779582024, "loss_breakdown/pointer_loss": 0.47084909677505493, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.006343279965221882, "loss_breakdown/pointer_loss": 1.1716228723526, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.0046251374296844006, "loss_breakdown/pointer_loss": 0.2750582695007324, "step": 210 }, { "epoch": 0.02141464098609323, "loss_breakdown/lm_loss": 0.005650409962981939, "loss_breakdown/pointer_loss": 0.4598516821861267, "step": 210 }, { "epoch": 0.02243438579495481, "grad_norm": 9.452001127730131, "learning_rate": 1.1162079510703365e-06, "loss": 0.4351, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.003093753010034561, "loss_breakdown/pointer_loss": 0.3968861699104309, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.003288860432803631, "loss_breakdown/pointer_loss": 0.7698512077331543, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.003814903786405921, "loss_breakdown/pointer_loss": 0.7357092499732971, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.003015934256836772, "loss_breakdown/pointer_loss": 0.8082408905029297, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.003020069794729352, "loss_breakdown/pointer_loss": 0.6329753398895264, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.0030464595183730125, "loss_breakdown/pointer_loss": 0.703900933265686, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.003212010022252798, "loss_breakdown/pointer_loss": 1.0155844688415527, "step": 220 }, { "epoch": 0.02243438579495481, "loss_breakdown/lm_loss": 0.00355112599208951, "loss_breakdown/pointer_loss": 1.2669057846069336, "step": 220 }, { "epoch": 0.023454130603816396, "grad_norm": 14.248063546927492, "learning_rate": 1.1671763506625892e-06, "loss": 0.3931, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.003829717170447111, "loss_breakdown/pointer_loss": 0.8339337110519409, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.0038451324217021465, "loss_breakdown/pointer_loss": 0.10815232992172241, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.0035141606349498034, "loss_breakdown/pointer_loss": 0.2970955967903137, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.0038634007796645164, "loss_breakdown/pointer_loss": 0.9893010258674622, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.0036877200473099947, "loss_breakdown/pointer_loss": 0.803989052772522, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.004043654538691044, "loss_breakdown/pointer_loss": 0.19352513551712036, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.003357003442943096, "loss_breakdown/pointer_loss": 0.529827356338501, "step": 230 }, { "epoch": 0.023454130603816396, "loss_breakdown/lm_loss": 0.0034135961905121803, "loss_breakdown/pointer_loss": 4.518490791320801, "step": 230 }, { "epoch": 0.024473875412677978, "grad_norm": 15.002985590341977, "learning_rate": 1.218144750254842e-06, "loss": 0.4301, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.0031416669953614473, "loss_breakdown/pointer_loss": 0.6092102527618408, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.003309597261250019, "loss_breakdown/pointer_loss": 0.448506236076355, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.0026492781471461058, "loss_breakdown/pointer_loss": 0.47470027208328247, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.0030342929530888796, "loss_breakdown/pointer_loss": 0.6313989162445068, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.003117261454463005, "loss_breakdown/pointer_loss": 0.301455020904541, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.0030693330336362123, "loss_breakdown/pointer_loss": 0.5201837420463562, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.0031131613068282604, "loss_breakdown/pointer_loss": 3.464036464691162, "step": 240 }, { "epoch": 0.024473875412677978, "loss_breakdown/lm_loss": 0.003298610681667924, "loss_breakdown/pointer_loss": 0.4227966070175171, "step": 240 }, { "epoch": 0.02549362022153956, "grad_norm": 24.92099817432226, "learning_rate": 1.2691131498470948e-06, "loss": 0.3822, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.004206209443509579, "loss_breakdown/pointer_loss": 2.1810030937194824, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.0029529856983572245, "loss_breakdown/pointer_loss": 0.7169124484062195, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.002757791429758072, "loss_breakdown/pointer_loss": 1.8610212802886963, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.002777646528556943, "loss_breakdown/pointer_loss": 0.8566973209381104, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.0026723381597548723, "loss_breakdown/pointer_loss": 0.6722477078437805, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.0027594242710620165, "loss_breakdown/pointer_loss": 0.6450580358505249, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.003192733973264694, "loss_breakdown/pointer_loss": 1.2065560817718506, "step": 250 }, { "epoch": 0.02549362022153956, "loss_breakdown/lm_loss": 0.0025537784676998854, "loss_breakdown/pointer_loss": 0.8825907707214355, "step": 250 }, { "epoch": 0.02651336503040114, "grad_norm": 10.064552039055206, "learning_rate": 1.3200815494393477e-06, "loss": 0.4094, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.0027727314736694098, "loss_breakdown/pointer_loss": 1.0014393329620361, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.0027721081860363483, "loss_breakdown/pointer_loss": 0.34552937746047974, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.003817293094471097, "loss_breakdown/pointer_loss": 1.671890139579773, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.0028966115787625313, "loss_breakdown/pointer_loss": 0.46916189789772034, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.0035884189419448376, "loss_breakdown/pointer_loss": 0.46265295147895813, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.0034588868729770184, "loss_breakdown/pointer_loss": 0.9976310133934021, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.0030844833236187696, "loss_breakdown/pointer_loss": 0.49425309896469116, "step": 260 }, { "epoch": 0.02651336503040114, "loss_breakdown/lm_loss": 0.002500623930245638, "loss_breakdown/pointer_loss": 0.3601193130016327, "step": 260 }, { "epoch": 0.027533109839262725, "grad_norm": 8.432691292787249, "learning_rate": 1.3710499490316006e-06, "loss": 0.4353, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.0019099017372354865, "loss_breakdown/pointer_loss": 0.8270711898803711, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.0018887192709371448, "loss_breakdown/pointer_loss": 0.6696589589118958, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.0019389954395592213, "loss_breakdown/pointer_loss": 0.37267208099365234, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.0021515381522476673, "loss_breakdown/pointer_loss": 1.1811537742614746, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.0018208726542070508, "loss_breakdown/pointer_loss": 0.7799381017684937, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.002072787843644619, "loss_breakdown/pointer_loss": 0.5161221027374268, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.001970058772712946, "loss_breakdown/pointer_loss": 0.789218008518219, "step": 270 }, { "epoch": 0.027533109839262725, "loss_breakdown/lm_loss": 0.0019094097660854459, "loss_breakdown/pointer_loss": 1.5577056407928467, "step": 270 }, { "epoch": 0.028552854648124307, "grad_norm": 19.958603221607778, "learning_rate": 1.4220183486238535e-06, "loss": 0.3622, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.002372761955484748, "loss_breakdown/pointer_loss": 0.37569570541381836, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.0022314798552542925, "loss_breakdown/pointer_loss": 1.221300482749939, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.0020117887761443853, "loss_breakdown/pointer_loss": 2.156008720397949, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.0027392457704991102, "loss_breakdown/pointer_loss": 0.35537588596343994, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.0021644679363816977, "loss_breakdown/pointer_loss": 0.18219879269599915, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.0018620736664161086, "loss_breakdown/pointer_loss": 0.5294457077980042, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.0021340304519981146, "loss_breakdown/pointer_loss": 0.23821735382080078, "step": 280 }, { "epoch": 0.028552854648124307, "loss_breakdown/lm_loss": 0.002220253460109234, "loss_breakdown/pointer_loss": 0.5472993850708008, "step": 280 }, { "epoch": 0.029572599456985888, "grad_norm": 9.068487554698494, "learning_rate": 1.4729867482161062e-06, "loss": 0.4053, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0017661458114162087, "loss_breakdown/pointer_loss": 0.6964536905288696, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0016759209102019668, "loss_breakdown/pointer_loss": 0.32813698053359985, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.001648787991143763, "loss_breakdown/pointer_loss": 1.1278438568115234, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0018878721166402102, "loss_breakdown/pointer_loss": 1.2449440956115723, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0018330764723941684, "loss_breakdown/pointer_loss": 0.2648477554321289, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0019555273465812206, "loss_breakdown/pointer_loss": 0.5165562629699707, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0019268307369202375, "loss_breakdown/pointer_loss": 0.3531314730644226, "step": 290 }, { "epoch": 0.029572599456985888, "loss_breakdown/lm_loss": 0.0019048997201025486, "loss_breakdown/pointer_loss": 0.3926916718482971, "step": 290 }, { "epoch": 0.030592344265847473, "grad_norm": 58.99133108469292, "learning_rate": 1.5239551478083589e-06, "loss": 0.3904, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0034661907702684402, "loss_breakdown/pointer_loss": 0.9482555389404297, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.003145545953884721, "loss_breakdown/pointer_loss": 2.529709815979004, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0016435697907581925, "loss_breakdown/pointer_loss": 1.6725711822509766, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0016942342044785619, "loss_breakdown/pointer_loss": 0.7845761179924011, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0016876315930858254, "loss_breakdown/pointer_loss": 1.1593035459518433, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0017115215305238962, "loss_breakdown/pointer_loss": 1.1330983638763428, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0016669376054778695, "loss_breakdown/pointer_loss": 0.7897890210151672, "step": 300 }, { "epoch": 0.030592344265847473, "loss_breakdown/lm_loss": 0.0030168124940246344, "loss_breakdown/pointer_loss": 0.8190715312957764, "step": 300 }, { "epoch": 0.031612089074709054, "grad_norm": 7.760867483835419, "learning_rate": 1.5749235474006116e-06, "loss": 0.3852, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.0015999800525605679, "loss_breakdown/pointer_loss": 0.21356095373630524, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.0016974653117358685, "loss_breakdown/pointer_loss": 0.3236783742904663, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.001727008493617177, "loss_breakdown/pointer_loss": 0.10775449872016907, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.0016713733784854412, "loss_breakdown/pointer_loss": 0.17033997178077698, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.0017878625076264143, "loss_breakdown/pointer_loss": 0.15043093264102936, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.0016686570597812533, "loss_breakdown/pointer_loss": 0.2110898345708847, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.0019403377082198858, "loss_breakdown/pointer_loss": 1.073062539100647, "step": 310 }, { "epoch": 0.031612089074709054, "loss_breakdown/lm_loss": 0.001827193540520966, "loss_breakdown/pointer_loss": 0.38346362113952637, "step": 310 }, { "epoch": 0.03263183388357064, "grad_norm": 6.118368891884031, "learning_rate": 1.6258919469928647e-06, "loss": 0.3868, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.001329069840721786, "loss_breakdown/pointer_loss": 0.7385667562484741, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.0013621192192658782, "loss_breakdown/pointer_loss": 0.5762463808059692, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.001305326004512608, "loss_breakdown/pointer_loss": 0.46388527750968933, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.0013522880617529154, "loss_breakdown/pointer_loss": 0.7714365720748901, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.001204535597935319, "loss_breakdown/pointer_loss": 0.6548737287521362, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.0012140381149947643, "loss_breakdown/pointer_loss": 0.5980236530303955, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.0014190002111718059, "loss_breakdown/pointer_loss": 0.3702613413333893, "step": 320 }, { "epoch": 0.03263183388357064, "loss_breakdown/lm_loss": 0.0013303429586812854, "loss_breakdown/pointer_loss": 0.962455689907074, "step": 320 }, { "epoch": 0.03365157869243222, "grad_norm": 13.522069718112588, "learning_rate": 1.6768603465851174e-06, "loss": 0.3584, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.0021708884742110968, "loss_breakdown/pointer_loss": 1.215847373008728, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.0014267347287386656, "loss_breakdown/pointer_loss": 1.5992971658706665, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.002016682643443346, "loss_breakdown/pointer_loss": 1.153430461883545, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.001312202657572925, "loss_breakdown/pointer_loss": 0.38037413358688354, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.0022208131849765778, "loss_breakdown/pointer_loss": 0.38448214530944824, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.0013549485011026263, "loss_breakdown/pointer_loss": 0.39174985885620117, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.001245251507498324, "loss_breakdown/pointer_loss": 0.47603118419647217, "step": 330 }, { "epoch": 0.03365157869243222, "loss_breakdown/lm_loss": 0.0013114443281665444, "loss_breakdown/pointer_loss": 0.19895285367965698, "step": 330 }, { "epoch": 0.0346713235012938, "grad_norm": 6.87496273310416, "learning_rate": 1.7278287461773703e-06, "loss": 0.3849, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.001057290006428957, "loss_breakdown/pointer_loss": 0.6213214993476868, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.0011002500541508198, "loss_breakdown/pointer_loss": 1.4612972736358643, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.0011557643301784992, "loss_breakdown/pointer_loss": 0.45609351992607117, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.0012401083949953318, "loss_breakdown/pointer_loss": 0.9476908445358276, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.001139256521128118, "loss_breakdown/pointer_loss": 0.6203352212905884, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.00115509785246104, "loss_breakdown/pointer_loss": 0.4172579050064087, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.0014520386466756463, "loss_breakdown/pointer_loss": 0.6880930066108704, "step": 340 }, { "epoch": 0.0346713235012938, "loss_breakdown/lm_loss": 0.001499612582847476, "loss_breakdown/pointer_loss": 0.3047441244125366, "step": 340 }, { "epoch": 0.03569106831015539, "grad_norm": 31.95880933412718, "learning_rate": 1.778797145769623e-06, "loss": 0.3477, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.002619360340759158, "loss_breakdown/pointer_loss": 3.598085641860962, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.0015025330940261483, "loss_breakdown/pointer_loss": 1.1214540004730225, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.0014171380316838622, "loss_breakdown/pointer_loss": 0.4682113528251648, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.0013929210836067796, "loss_breakdown/pointer_loss": 0.9329819679260254, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.0012284473050385714, "loss_breakdown/pointer_loss": 0.5228589773178101, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.0009908133652061224, "loss_breakdown/pointer_loss": 0.35703837871551514, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.001029709354043007, "loss_breakdown/pointer_loss": 1.0529725551605225, "step": 350 }, { "epoch": 0.03569106831015539, "loss_breakdown/lm_loss": 0.0009982824558392167, "loss_breakdown/pointer_loss": 0.4684949219226837, "step": 350 }, { "epoch": 0.036710813119016965, "grad_norm": 10.476651974554665, "learning_rate": 1.8297655453618757e-06, "loss": 0.4167, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.001161233289167285, "loss_breakdown/pointer_loss": 0.7527960538864136, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.0010053232545033097, "loss_breakdown/pointer_loss": 0.1990932822227478, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.001246207277290523, "loss_breakdown/pointer_loss": 0.2958507835865021, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.0010965269757434726, "loss_breakdown/pointer_loss": 0.3882462978363037, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.0008974734228104353, "loss_breakdown/pointer_loss": 0.2637998163700104, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.0012080555316060781, "loss_breakdown/pointer_loss": 1.1950217485427856, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.0009437323315069079, "loss_breakdown/pointer_loss": 0.4192771911621094, "step": 360 }, { "epoch": 0.036710813119016965, "loss_breakdown/lm_loss": 0.0008175949333235621, "loss_breakdown/pointer_loss": 0.17014791071414948, "step": 360 }, { "epoch": 0.03773055792787855, "grad_norm": 5.063320430583013, "learning_rate": 1.8807339449541288e-06, "loss": 0.3767, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009427358745597303, "loss_breakdown/pointer_loss": 0.7072398066520691, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009714883635751903, "loss_breakdown/pointer_loss": 0.5362037420272827, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009318740339949727, "loss_breakdown/pointer_loss": 0.4954662024974823, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009000668651424348, "loss_breakdown/pointer_loss": 0.403382807970047, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009302237303927541, "loss_breakdown/pointer_loss": 1.0962371826171875, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009624594240449369, "loss_breakdown/pointer_loss": 0.3263820707798004, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009159116889350116, "loss_breakdown/pointer_loss": 0.3625091314315796, "step": 370 }, { "epoch": 0.03773055792787855, "loss_breakdown/lm_loss": 0.0009881268488243222, "loss_breakdown/pointer_loss": 0.5767502784729004, "step": 370 }, { "epoch": 0.03875030273674013, "grad_norm": 7.826331811703801, "learning_rate": 1.9317023445463812e-06, "loss": 0.3419, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0009167414391413331, "loss_breakdown/pointer_loss": 1.60537588596344, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0009810152696445584, "loss_breakdown/pointer_loss": 2.619699001312256, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0009190402342937887, "loss_breakdown/pointer_loss": 1.089131236076355, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0008466834551654756, "loss_breakdown/pointer_loss": 0.279827356338501, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0011933663627132773, "loss_breakdown/pointer_loss": 1.739917278289795, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0009979939786717296, "loss_breakdown/pointer_loss": 1.1446645259857178, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0007028834079392254, "loss_breakdown/pointer_loss": 0.30903327465057373, "step": 380 }, { "epoch": 0.03875030273674013, "loss_breakdown/lm_loss": 0.0009025559993460774, "loss_breakdown/pointer_loss": 0.17947763204574585, "step": 380 }, { "epoch": 0.03977004754560171, "grad_norm": 5.7391216310692785, "learning_rate": 1.982670744138634e-06, "loss": 0.4017, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0007083045202307403, "loss_breakdown/pointer_loss": 0.9326984286308289, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0007084449171088636, "loss_breakdown/pointer_loss": 0.23321019113063812, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0007970596197992563, "loss_breakdown/pointer_loss": 1.0246140956878662, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.001038924092426896, "loss_breakdown/pointer_loss": 0.4047107398509979, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0008682970074005425, "loss_breakdown/pointer_loss": 0.6741926670074463, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0006773971254006028, "loss_breakdown/pointer_loss": 0.4400741755962372, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0008530122577212751, "loss_breakdown/pointer_loss": 0.46371981501579285, "step": 390 }, { "epoch": 0.03977004754560171, "loss_breakdown/lm_loss": 0.0007454408332705498, "loss_breakdown/pointer_loss": 0.33818671107292175, "step": 390 }, { "epoch": 0.0407897923544633, "grad_norm": 9.615403732970718, "learning_rate": 2.033639143730887e-06, "loss": 0.3549, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.001418184139765799, "loss_breakdown/pointer_loss": 1.0218859910964966, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.0011133933439850807, "loss_breakdown/pointer_loss": 0.7415446639060974, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.0008748107356950641, "loss_breakdown/pointer_loss": 1.268225073814392, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.0007764773326925933, "loss_breakdown/pointer_loss": 0.8630973696708679, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.000818517291918397, "loss_breakdown/pointer_loss": 0.579319179058075, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.0007056877366267145, "loss_breakdown/pointer_loss": 1.1932802200317383, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.0007746531045995653, "loss_breakdown/pointer_loss": 0.47154366970062256, "step": 400 }, { "epoch": 0.0407897923544633, "loss_breakdown/lm_loss": 0.0007159283268265426, "loss_breakdown/pointer_loss": 0.673617959022522, "step": 400 }, { "epoch": 0.041809537163324875, "grad_norm": 8.45997145868462, "learning_rate": 2.0846075433231395e-06, "loss": 0.3713, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0006718423683196306, "loss_breakdown/pointer_loss": 0.2604365348815918, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0009350907057523727, "loss_breakdown/pointer_loss": 0.42594295740127563, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0007135617197491229, "loss_breakdown/pointer_loss": 0.48052072525024414, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0007251354400068521, "loss_breakdown/pointer_loss": 1.162750005722046, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0008395478362217546, "loss_breakdown/pointer_loss": 0.6389744877815247, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0007419893518090248, "loss_breakdown/pointer_loss": 0.501017689704895, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0007587155560031533, "loss_breakdown/pointer_loss": 0.2240687906742096, "step": 410 }, { "epoch": 0.041809537163324875, "loss_breakdown/lm_loss": 0.0008061161497607827, "loss_breakdown/pointer_loss": 0.38736164569854736, "step": 410 }, { "epoch": 0.04282928197218646, "grad_norm": 5.986492767818636, "learning_rate": 2.135575942915393e-06, "loss": 0.3951, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0006562078488059342, "loss_breakdown/pointer_loss": 0.6301571130752563, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0006853309459984303, "loss_breakdown/pointer_loss": 0.4527450203895569, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0006353775388561189, "loss_breakdown/pointer_loss": 0.46280884742736816, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0005845190025866032, "loss_breakdown/pointer_loss": 1.0148634910583496, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0006917015416547656, "loss_breakdown/pointer_loss": 1.9470300674438477, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0006744459387846291, "loss_breakdown/pointer_loss": 0.27807897329330444, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0007025481900200248, "loss_breakdown/pointer_loss": 0.5562300086021423, "step": 420 }, { "epoch": 0.04282928197218646, "loss_breakdown/lm_loss": 0.0006461543380282819, "loss_breakdown/pointer_loss": 0.2699858248233795, "step": 420 }, { "epoch": 0.043849026781048045, "grad_norm": 12.3342485795291, "learning_rate": 2.1865443425076453e-06, "loss": 0.3651, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0009562279446981847, "loss_breakdown/pointer_loss": 0.14910632371902466, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0006248440477065742, "loss_breakdown/pointer_loss": 0.4760872721672058, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.000743627198971808, "loss_breakdown/pointer_loss": 0.3785964548587799, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0006926950882188976, "loss_breakdown/pointer_loss": 0.16209536790847778, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0007465163362212479, "loss_breakdown/pointer_loss": 3.002133846282959, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0006999896140769124, "loss_breakdown/pointer_loss": 0.13051939010620117, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0007813653792254627, "loss_breakdown/pointer_loss": 0.421925812959671, "step": 430 }, { "epoch": 0.043849026781048045, "loss_breakdown/lm_loss": 0.0006872834055684507, "loss_breakdown/pointer_loss": 2.587299346923828, "step": 430 }, { "epoch": 0.04486877158990962, "grad_norm": 6.086803364490919, "learning_rate": 2.2375127420998982e-06, "loss": 0.3807, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.00048382283421233296, "loss_breakdown/pointer_loss": 0.37868550419807434, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.000540569017175585, "loss_breakdown/pointer_loss": 0.3002944886684418, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.0005486732115969062, "loss_breakdown/pointer_loss": 0.5107450485229492, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.0005011126631870866, "loss_breakdown/pointer_loss": 0.30066341161727905, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.0006562577909789979, "loss_breakdown/pointer_loss": 1.2298260927200317, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.0005293831927701831, "loss_breakdown/pointer_loss": 0.5762923955917358, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.0007097002235241234, "loss_breakdown/pointer_loss": 1.5116071701049805, "step": 440 }, { "epoch": 0.04486877158990962, "loss_breakdown/lm_loss": 0.0005303304642438889, "loss_breakdown/pointer_loss": 0.47199225425720215, "step": 440 }, { "epoch": 0.04588851639877121, "grad_norm": 11.131006140059414, "learning_rate": 2.288481141692151e-06, "loss": 0.3591, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0009805663721635938, "loss_breakdown/pointer_loss": 1.9173195362091064, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0006133939605206251, "loss_breakdown/pointer_loss": 1.3847227096557617, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0007368716760538518, "loss_breakdown/pointer_loss": 0.5668754577636719, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0005693294806405902, "loss_breakdown/pointer_loss": 1.2675347328186035, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0005565799074247479, "loss_breakdown/pointer_loss": 1.349981427192688, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0005845350096933544, "loss_breakdown/pointer_loss": 0.6794165372848511, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.0005695060244761407, "loss_breakdown/pointer_loss": 0.5557557940483093, "step": 450 }, { "epoch": 0.04588851639877121, "loss_breakdown/lm_loss": 0.00054246059153229, "loss_breakdown/pointer_loss": 0.9587036371231079, "step": 450 }, { "epoch": 0.04690826120763279, "grad_norm": 6.392082274296561, "learning_rate": 2.339449541284404e-06, "loss": 0.3744, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0005402075476013124, "loss_breakdown/pointer_loss": 0.5006047487258911, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0007237491081468761, "loss_breakdown/pointer_loss": 1.236380934715271, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0005020907265134156, "loss_breakdown/pointer_loss": 0.37311244010925293, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0005357834743335843, "loss_breakdown/pointer_loss": 0.5927277207374573, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0006022356683388352, "loss_breakdown/pointer_loss": 0.33308112621307373, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0005202224128879607, "loss_breakdown/pointer_loss": 0.8544079661369324, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0005278691533021629, "loss_breakdown/pointer_loss": 0.22408032417297363, "step": 460 }, { "epoch": 0.04690826120763279, "loss_breakdown/lm_loss": 0.0004756460548378527, "loss_breakdown/pointer_loss": 0.22823531925678253, "step": 460 }, { "epoch": 0.04792800601649437, "grad_norm": 5.063691189304351, "learning_rate": 2.390417940876657e-06, "loss": 0.3787, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.00044447972322814167, "loss_breakdown/pointer_loss": 0.36928653717041016, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.00044617336243391037, "loss_breakdown/pointer_loss": 1.3105762004852295, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.000427092716563493, "loss_breakdown/pointer_loss": 0.4617704749107361, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.0004858141182921827, "loss_breakdown/pointer_loss": 0.44962769746780396, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.0003791343478951603, "loss_breakdown/pointer_loss": 0.6858021020889282, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.00048564892495051026, "loss_breakdown/pointer_loss": 0.3951139450073242, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.0005209880182519555, "loss_breakdown/pointer_loss": 0.3357059359550476, "step": 470 }, { "epoch": 0.04792800601649437, "loss_breakdown/lm_loss": 0.0004043001390527934, "loss_breakdown/pointer_loss": 0.25742626190185547, "step": 470 }, { "epoch": 0.048947750825355955, "grad_norm": 11.993517996719232, "learning_rate": 2.4413863404689094e-06, "loss": 0.3544, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0004694217932410538, "loss_breakdown/pointer_loss": 0.18720203638076782, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0004977909266017377, "loss_breakdown/pointer_loss": 0.5130826830863953, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0005333219887688756, "loss_breakdown/pointer_loss": 0.6379256248474121, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.00044297947897575796, "loss_breakdown/pointer_loss": 0.06592095643281937, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0005862672696821392, "loss_breakdown/pointer_loss": 0.4455360174179077, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0004542176320683211, "loss_breakdown/pointer_loss": 0.14552102982997894, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0005541719729080796, "loss_breakdown/pointer_loss": 0.666612982749939, "step": 480 }, { "epoch": 0.048947750825355955, "loss_breakdown/lm_loss": 0.0005588653148151934, "loss_breakdown/pointer_loss": 0.9129581451416016, "step": 480 }, { "epoch": 0.04996749563421754, "grad_norm": 6.924696603134451, "learning_rate": 2.4923547400611623e-06, "loss": 0.3952, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.00044676766265183687, "loss_breakdown/pointer_loss": 0.4351302981376648, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.00037755051744170487, "loss_breakdown/pointer_loss": 0.412767618894577, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.000534193473868072, "loss_breakdown/pointer_loss": 1.7377482652664185, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.0005495342775247991, "loss_breakdown/pointer_loss": 1.0613528490066528, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.0005288274842314422, "loss_breakdown/pointer_loss": 0.8015437126159668, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.0004528157878667116, "loss_breakdown/pointer_loss": 0.3964918851852417, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.0003840423305518925, "loss_breakdown/pointer_loss": 0.3680000305175781, "step": 490 }, { "epoch": 0.04996749563421754, "loss_breakdown/lm_loss": 0.00045434266212396324, "loss_breakdown/pointer_loss": 0.3488462567329407, "step": 490 }, { "epoch": 0.05098724044307912, "grad_norm": 12.365588359279533, "learning_rate": 2.5433231396534152e-06, "loss": 0.3725, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0007989371661096811, "loss_breakdown/pointer_loss": 2.6653151512145996, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0006473925895988941, "loss_breakdown/pointer_loss": 0.5413621664047241, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0004081313090864569, "loss_breakdown/pointer_loss": 1.0003786087036133, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0006265999400056899, "loss_breakdown/pointer_loss": 0.3986154794692993, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.00040951912524178624, "loss_breakdown/pointer_loss": 0.48195385932922363, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0004829832469113171, "loss_breakdown/pointer_loss": 0.6382269859313965, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0004977986100129783, "loss_breakdown/pointer_loss": 0.5688208341598511, "step": 500 }, { "epoch": 0.05098724044307912, "loss_breakdown/lm_loss": 0.0003971347468905151, "loss_breakdown/pointer_loss": 0.321796178817749, "step": 500 }, { "epoch": 0.0520069852519407, "grad_norm": 6.352794502337882, "learning_rate": 2.594291539245668e-06, "loss": 0.3707, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.0004017514002043754, "loss_breakdown/pointer_loss": 0.37418586015701294, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.0005233926349319518, "loss_breakdown/pointer_loss": 0.46363556385040283, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.0004173495981376618, "loss_breakdown/pointer_loss": 0.2655513286590576, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.0004067224799655378, "loss_breakdown/pointer_loss": 0.9832513332366943, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.0004131339373998344, "loss_breakdown/pointer_loss": 0.35496821999549866, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.00035534161725081503, "loss_breakdown/pointer_loss": 0.39409899711608887, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.0005157389095984399, "loss_breakdown/pointer_loss": 0.2971224784851074, "step": 510 }, { "epoch": 0.0520069852519407, "loss_breakdown/lm_loss": 0.000534426944795996, "loss_breakdown/pointer_loss": 0.46690231561660767, "step": 510 }, { "epoch": 0.05302673006080228, "grad_norm": 3.7055450263771177, "learning_rate": 2.6452599388379206e-06, "loss": 0.3488, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.0003222555387765169, "loss_breakdown/pointer_loss": 0.8467929363250732, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.00028766473405994475, "loss_breakdown/pointer_loss": 0.8588017821311951, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.00028954370645806193, "loss_breakdown/pointer_loss": 0.7638702392578125, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.00030190686811693013, "loss_breakdown/pointer_loss": 0.5051649808883667, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.0003716126084327698, "loss_breakdown/pointer_loss": 0.33361274003982544, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.00031786051113158464, "loss_breakdown/pointer_loss": 0.7337746620178223, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.0003507736837491393, "loss_breakdown/pointer_loss": 0.6884447336196899, "step": 520 }, { "epoch": 0.05302673006080228, "loss_breakdown/lm_loss": 0.00032028209534473717, "loss_breakdown/pointer_loss": 0.35890668630599976, "step": 520 }, { "epoch": 0.054046474869663866, "grad_norm": 7.934242478195668, "learning_rate": 2.6962283384301735e-06, "loss": 0.3589, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.0003091700782533735, "loss_breakdown/pointer_loss": 0.44088849425315857, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.0003269686712883413, "loss_breakdown/pointer_loss": 0.45824047923088074, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.0003867927298415452, "loss_breakdown/pointer_loss": 0.36080533266067505, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.0004659619298763573, "loss_breakdown/pointer_loss": 0.19936808943748474, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.0003436952247284353, "loss_breakdown/pointer_loss": 0.23403501510620117, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.00032897148048505187, "loss_breakdown/pointer_loss": 0.6383318901062012, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.00033846282167360187, "loss_breakdown/pointer_loss": 0.38927650451660156, "step": 530 }, { "epoch": 0.054046474869663866, "loss_breakdown/lm_loss": 0.00039440713590011, "loss_breakdown/pointer_loss": 0.40530169010162354, "step": 530 }, { "epoch": 0.05506621967852545, "grad_norm": 4.224142465699685, "learning_rate": 2.7471967380224264e-06, "loss": 0.4138, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.00029378649196587503, "loss_breakdown/pointer_loss": 0.42276066541671753, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.00038314127596095204, "loss_breakdown/pointer_loss": 0.7653011083602905, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.0004450779524631798, "loss_breakdown/pointer_loss": 0.22228486835956573, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.00048597960267215967, "loss_breakdown/pointer_loss": 0.582461953163147, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.00033270474523305893, "loss_breakdown/pointer_loss": 0.4181790351867676, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.00033646373776718974, "loss_breakdown/pointer_loss": 0.4569823145866394, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.00036314286990091205, "loss_breakdown/pointer_loss": 0.21095359325408936, "step": 540 }, { "epoch": 0.05506621967852545, "loss_breakdown/lm_loss": 0.05835958197712898, "loss_breakdown/pointer_loss": 1.0772676467895508, "step": 540 }, { "epoch": 0.05608596448738703, "grad_norm": 9.078176217347576, "learning_rate": 2.798165137614679e-06, "loss": 0.3657, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.0013623975683003664, "loss_breakdown/pointer_loss": 4.0300774574279785, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.0008385309483855963, "loss_breakdown/pointer_loss": 1.420295238494873, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.0005609474028460681, "loss_breakdown/pointer_loss": 2.047574758529663, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.00042241744813509285, "loss_breakdown/pointer_loss": 0.7167174220085144, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.00047639996046200395, "loss_breakdown/pointer_loss": 0.7604405879974365, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.000489765137899667, "loss_breakdown/pointer_loss": 0.708173394203186, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.00041251807124353945, "loss_breakdown/pointer_loss": 0.518315315246582, "step": 550 }, { "epoch": 0.05608596448738703, "loss_breakdown/lm_loss": 0.00036738484050147235, "loss_breakdown/pointer_loss": 1.4866230487823486, "step": 550 }, { "epoch": 0.057105709296248613, "grad_norm": 9.590450650865593, "learning_rate": 2.849133537206932e-06, "loss": 0.3827, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.00035011779982596636, "loss_breakdown/pointer_loss": 0.2353854775428772, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.0005324665107764304, "loss_breakdown/pointer_loss": 2.6663496494293213, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.000522239541169256, "loss_breakdown/pointer_loss": 0.39908450841903687, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.0004026920651085675, "loss_breakdown/pointer_loss": 0.6088233590126038, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.000409457745263353, "loss_breakdown/pointer_loss": 2.80717396736145, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.0004241820133756846, "loss_breakdown/pointer_loss": 0.25479796528816223, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.00042445602593943477, "loss_breakdown/pointer_loss": 2.2367477416992188, "step": 560 }, { "epoch": 0.057105709296248613, "loss_breakdown/lm_loss": 0.00040494996937923133, "loss_breakdown/pointer_loss": 0.6475188732147217, "step": 560 }, { "epoch": 0.0581254541051102, "grad_norm": 4.724045507113711, "learning_rate": 2.9001019367991847e-06, "loss": 0.3696, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.0002592704549897462, "loss_breakdown/pointer_loss": 0.3944624662399292, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.0002565958129707724, "loss_breakdown/pointer_loss": 0.9081690311431885, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.0002544541785027832, "loss_breakdown/pointer_loss": 1.0618410110473633, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.0003008814819622785, "loss_breakdown/pointer_loss": 0.9317227602005005, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.00035016759647987783, "loss_breakdown/pointer_loss": 0.6123095154762268, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.00023832361330278218, "loss_breakdown/pointer_loss": 0.37819451093673706, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.00030528620118275285, "loss_breakdown/pointer_loss": 0.4935232996940613, "step": 570 }, { "epoch": 0.0581254541051102, "loss_breakdown/lm_loss": 0.0002705626538954675, "loss_breakdown/pointer_loss": 0.6365396976470947, "step": 570 }, { "epoch": 0.059145198913971776, "grad_norm": 10.731209495637284, "learning_rate": 2.9510703363914376e-06, "loss": 0.3501, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.00032296410063281655, "loss_breakdown/pointer_loss": 2.1997783184051514, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.0002555068058427423, "loss_breakdown/pointer_loss": 0.5107305645942688, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.00032133006607182324, "loss_breakdown/pointer_loss": 0.03807316720485687, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.0003065150522161275, "loss_breakdown/pointer_loss": 2.407393217086792, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.00027747650165110826, "loss_breakdown/pointer_loss": 0.34505510330200195, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.0002896167279686779, "loss_breakdown/pointer_loss": 0.16003187000751495, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.0003421693982090801, "loss_breakdown/pointer_loss": 0.2676714360713959, "step": 580 }, { "epoch": 0.059145198913971776, "loss_breakdown/lm_loss": 0.0005331560969352722, "loss_breakdown/pointer_loss": 0.2033371925354004, "step": 580 }, { "epoch": 0.06016494372283336, "grad_norm": 5.928313949102333, "learning_rate": 3.0020387359836905e-06, "loss": 0.3837, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.00025503261713311076, "loss_breakdown/pointer_loss": 1.2324310541152954, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.0002862636756617576, "loss_breakdown/pointer_loss": 0.7205149531364441, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.00022762510343454778, "loss_breakdown/pointer_loss": 0.6823477745056152, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.00023425403924193233, "loss_breakdown/pointer_loss": 0.7424026727676392, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.00028638375806622207, "loss_breakdown/pointer_loss": 0.46766021847724915, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.0003079356101807207, "loss_breakdown/pointer_loss": 1.138388991355896, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.00022922999050933868, "loss_breakdown/pointer_loss": 0.19208401441574097, "step": 590 }, { "epoch": 0.06016494372283336, "loss_breakdown/lm_loss": 0.0003135586157441139, "loss_breakdown/pointer_loss": 0.14621829986572266, "step": 590 }, { "epoch": 0.061184688531694946, "grad_norm": 10.527347796394555, "learning_rate": 3.0530071355759434e-06, "loss": 0.3458, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.0005131785292178392, "loss_breakdown/pointer_loss": 3.330268621444702, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.00031318326364271343, "loss_breakdown/pointer_loss": 0.6992675065994263, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.00023616731050424278, "loss_breakdown/pointer_loss": 1.0659555196762085, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.00026738762971945107, "loss_breakdown/pointer_loss": 1.2453306913375854, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.0002551913494244218, "loss_breakdown/pointer_loss": 0.6276149749755859, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.00028603198006749153, "loss_breakdown/pointer_loss": 0.825048565864563, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.00022373165120370686, "loss_breakdown/pointer_loss": 0.6556029319763184, "step": 600 }, { "epoch": 0.061184688531694946, "loss_breakdown/lm_loss": 0.00023794584558345377, "loss_breakdown/pointer_loss": 1.4674674272537231, "step": 600 }, { "epoch": 0.062204433340556524, "grad_norm": 7.630566008683153, "learning_rate": 3.103975535168196e-06, "loss": 0.37, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00021655937598552555, "loss_breakdown/pointer_loss": 0.5396507978439331, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00021763220138382167, "loss_breakdown/pointer_loss": 0.5193843841552734, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00022003058984410018, "loss_breakdown/pointer_loss": 0.6597169041633606, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00037289896863512695, "loss_breakdown/pointer_loss": 0.41537487506866455, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.0002109694469254464, "loss_breakdown/pointer_loss": 0.624483585357666, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00022528359841089696, "loss_breakdown/pointer_loss": 0.4974147379398346, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00032165058655664325, "loss_breakdown/pointer_loss": 1.9278547763824463, "step": 610 }, { "epoch": 0.062204433340556524, "loss_breakdown/lm_loss": 0.00033486541360616684, "loss_breakdown/pointer_loss": 0.481279194355011, "step": 610 }, { "epoch": 0.06322417814941811, "grad_norm": 6.676082098887911, "learning_rate": 3.154943934760449e-06, "loss": 0.3501, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00020846285042352974, "loss_breakdown/pointer_loss": 0.6441099643707275, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00018081064627040178, "loss_breakdown/pointer_loss": 0.6718973517417908, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00020560160919558257, "loss_breakdown/pointer_loss": 0.5096335411071777, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00021020097483415157, "loss_breakdown/pointer_loss": 0.31608739495277405, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00022152501333039254, "loss_breakdown/pointer_loss": 0.739711582660675, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00031909235985949636, "loss_breakdown/pointer_loss": 1.345531702041626, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.0002051188494078815, "loss_breakdown/pointer_loss": 0.8359969854354858, "step": 620 }, { "epoch": 0.06322417814941811, "loss_breakdown/lm_loss": 0.00027478247648105025, "loss_breakdown/pointer_loss": 1.4776899814605713, "step": 620 }, { "epoch": 0.06424392295827969, "grad_norm": 10.142816386428606, "learning_rate": 3.2059123343527017e-06, "loss": 0.3593, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.00031157638295553625, "loss_breakdown/pointer_loss": 1.8775359392166138, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.00021331121388357133, "loss_breakdown/pointer_loss": 1.451849341392517, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.00021977447613608092, "loss_breakdown/pointer_loss": 0.08919572830200195, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.0003696078492794186, "loss_breakdown/pointer_loss": 0.41943663358688354, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.00027765947743318975, "loss_breakdown/pointer_loss": 0.08128732442855835, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.00028361185104586184, "loss_breakdown/pointer_loss": 0.5675990581512451, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.00021130974346306175, "loss_breakdown/pointer_loss": 0.5671435594558716, "step": 630 }, { "epoch": 0.06424392295827969, "loss_breakdown/lm_loss": 0.0002941293059848249, "loss_breakdown/pointer_loss": 2.3658390045166016, "step": 630 }, { "epoch": 0.06526366776714128, "grad_norm": 6.391903421984558, "learning_rate": 3.256880733944954e-06, "loss": 0.3857, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.0001918130728881806, "loss_breakdown/pointer_loss": 0.26128503680229187, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.0001622526760911569, "loss_breakdown/pointer_loss": 0.41564077138900757, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.0001812374102883041, "loss_breakdown/pointer_loss": 3.161301374435425, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.00022198048827704042, "loss_breakdown/pointer_loss": 0.26816385984420776, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.00018897424160968512, "loss_breakdown/pointer_loss": 0.6607258319854736, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.0001651472703088075, "loss_breakdown/pointer_loss": 0.3590698838233948, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.00024720997316762805, "loss_breakdown/pointer_loss": 0.1360754519701004, "step": 640 }, { "epoch": 0.06526366776714128, "loss_breakdown/lm_loss": 0.00020267508807592094, "loss_breakdown/pointer_loss": 0.8966611623764038, "step": 640 }, { "epoch": 0.06628341257600286, "grad_norm": 6.822916507755878, "learning_rate": 3.307849133537207e-06, "loss": 0.3467, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.0005580800352618098, "loss_breakdown/pointer_loss": 2.9189815521240234, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.00031763510196469724, "loss_breakdown/pointer_loss": 0.8283815383911133, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.00044696935219690204, "loss_breakdown/pointer_loss": 0.7319267988204956, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.0003246051783207804, "loss_breakdown/pointer_loss": 1.074458122253418, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.0002523797156754881, "loss_breakdown/pointer_loss": 1.0864475965499878, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.0002711792185436934, "loss_breakdown/pointer_loss": 0.3857179284095764, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.00025916253798641264, "loss_breakdown/pointer_loss": 1.2029099464416504, "step": 650 }, { "epoch": 0.06628341257600286, "loss_breakdown/lm_loss": 0.00020560457778628916, "loss_breakdown/pointer_loss": 0.6435347199440002, "step": 650 }, { "epoch": 0.06730315738486443, "grad_norm": 4.948179861787148, "learning_rate": 3.35881753312946e-06, "loss": 0.3728, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.0002714928996283561, "loss_breakdown/pointer_loss": 0.5116109251976013, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.000533494574483484, "loss_breakdown/pointer_loss": 0.271908700466156, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.0004081411170773208, "loss_breakdown/pointer_loss": 0.4501716196537018, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.0005408760625869036, "loss_breakdown/pointer_loss": 0.320046603679657, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.0007606291328556836, "loss_breakdown/pointer_loss": 0.552178680896759, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.000574242090806365, "loss_breakdown/pointer_loss": 0.4251701831817627, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.0007922466611489654, "loss_breakdown/pointer_loss": 0.2689271569252014, "step": 660 }, { "epoch": 0.06730315738486443, "loss_breakdown/lm_loss": 0.0006132787093520164, "loss_breakdown/pointer_loss": 0.8110072612762451, "step": 660 }, { "epoch": 0.06832290219372603, "grad_norm": 3.606668801280549, "learning_rate": 3.4097859327217125e-06, "loss": 0.3953, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00019670106121338904, "loss_breakdown/pointer_loss": 0.38881659507751465, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00020887474238406867, "loss_breakdown/pointer_loss": 0.7707716822624207, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00021915542311035097, "loss_breakdown/pointer_loss": 0.6900334358215332, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00018971155805047601, "loss_breakdown/pointer_loss": 0.27753183245658875, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.0001627449382795021, "loss_breakdown/pointer_loss": 0.3758466839790344, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00023089033493306488, "loss_breakdown/pointer_loss": 1.4866647720336914, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00018282941891811788, "loss_breakdown/pointer_loss": 0.2035059630870819, "step": 670 }, { "epoch": 0.06832290219372603, "loss_breakdown/lm_loss": 0.00026285904459655285, "loss_breakdown/pointer_loss": 0.9200844168663025, "step": 670 }, { "epoch": 0.0693426470025876, "grad_norm": 9.450862795876587, "learning_rate": 3.460754332313966e-06, "loss": 0.3478, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.000230550387641415, "loss_breakdown/pointer_loss": 0.1606944054365158, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.00026668168720789254, "loss_breakdown/pointer_loss": 0.7015256881713867, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.0002812402381096035, "loss_breakdown/pointer_loss": 0.3043896555900574, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.0001692160003585741, "loss_breakdown/pointer_loss": 0.15385079383850098, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.000662437581922859, "loss_breakdown/pointer_loss": 0.45459622144699097, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.00019205718126613647, "loss_breakdown/pointer_loss": 0.6321746110916138, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.00024505623150616884, "loss_breakdown/pointer_loss": 0.14091186225414276, "step": 680 }, { "epoch": 0.0693426470025876, "loss_breakdown/lm_loss": 0.00035276456037536263, "loss_breakdown/pointer_loss": 0.3664900064468384, "step": 680 }, { "epoch": 0.07036239181144918, "grad_norm": 4.048764284527278, "learning_rate": 3.5117227319062187e-06, "loss": 0.354, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.00019962147052865475, "loss_breakdown/pointer_loss": 0.32643914222717285, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.00016628651064820588, "loss_breakdown/pointer_loss": 0.2687206268310547, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.0001809539826354012, "loss_breakdown/pointer_loss": 0.19997340440750122, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.00015848690236452967, "loss_breakdown/pointer_loss": 0.8651931881904602, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.00016805935592856258, "loss_breakdown/pointer_loss": 1.4053386449813843, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.00020165424211882055, "loss_breakdown/pointer_loss": 0.31557697057724, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.0001871253625722602, "loss_breakdown/pointer_loss": 0.8906360864639282, "step": 690 }, { "epoch": 0.07036239181144918, "loss_breakdown/lm_loss": 0.00020078041416127235, "loss_breakdown/pointer_loss": 1.407374382019043, "step": 690 }, { "epoch": 0.07138213662031077, "grad_norm": 10.68563101359134, "learning_rate": 3.562691131498471e-06, "loss": 0.3551, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.00048352472367696464, "loss_breakdown/pointer_loss": 1.1373260021209717, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.00023167439212556928, "loss_breakdown/pointer_loss": 1.2404718399047852, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.00023544191208202392, "loss_breakdown/pointer_loss": 0.3181227445602417, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.0002152880042558536, "loss_breakdown/pointer_loss": 0.7021924257278442, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.00020178667909931391, "loss_breakdown/pointer_loss": 0.9597254395484924, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.0003954946296289563, "loss_breakdown/pointer_loss": 2.4950156211853027, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.0001506689004600048, "loss_breakdown/pointer_loss": 0.6003177165985107, "step": 700 }, { "epoch": 0.07138213662031077, "loss_breakdown/lm_loss": 0.0002020570682361722, "loss_breakdown/pointer_loss": 1.6081061363220215, "step": 700 }, { "epoch": 0.07240188142917235, "grad_norm": 6.080751271216906, "learning_rate": 3.613659531090724e-06, "loss": 0.3688, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.0001557174837216735, "loss_breakdown/pointer_loss": 0.15376025438308716, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.0001351081591565162, "loss_breakdown/pointer_loss": 0.20017117261886597, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.00014558082330040634, "loss_breakdown/pointer_loss": 0.2596484422683716, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.00016474236326757818, "loss_breakdown/pointer_loss": 0.5022501349449158, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.00017658014257904142, "loss_breakdown/pointer_loss": 0.10176730155944824, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.00015972787514328957, "loss_breakdown/pointer_loss": 0.20973074436187744, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.00014069942699279636, "loss_breakdown/pointer_loss": 0.7174942493438721, "step": 710 }, { "epoch": 0.07240188142917235, "loss_breakdown/lm_loss": 0.0002265234652440995, "loss_breakdown/pointer_loss": 0.6831387877464294, "step": 710 }, { "epoch": 0.07342162623803393, "grad_norm": 3.623834307021345, "learning_rate": 3.664627930682977e-06, "loss": 0.373, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.00017042947001755238, "loss_breakdown/pointer_loss": 1.4082796573638916, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.0002291065320605412, "loss_breakdown/pointer_loss": 0.8629769086837769, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.00014923782146070153, "loss_breakdown/pointer_loss": 0.6419671773910522, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.00015440832066815346, "loss_breakdown/pointer_loss": 0.436161607503891, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.000309001246932894, "loss_breakdown/pointer_loss": 0.7805345058441162, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.0002447475562803447, "loss_breakdown/pointer_loss": 1.3109465837478638, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.00019714096561074257, "loss_breakdown/pointer_loss": 0.28904077410697937, "step": 720 }, { "epoch": 0.07342162623803393, "loss_breakdown/lm_loss": 0.0002609224757179618, "loss_breakdown/pointer_loss": 0.9458904266357422, "step": 720 }, { "epoch": 0.07444137104689551, "grad_norm": 33.65945020356024, "learning_rate": 3.7155963302752295e-06, "loss": 0.3372, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00015160370094235986, "loss_breakdown/pointer_loss": 0.14174389839172363, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00018342098337598145, "loss_breakdown/pointer_loss": 0.38044288754463196, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00014719121099915355, "loss_breakdown/pointer_loss": 0.24199476838111877, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.000211002174182795, "loss_breakdown/pointer_loss": 3.015571355819702, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00018066578195430338, "loss_breakdown/pointer_loss": 0.17748618125915527, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00019883827189914882, "loss_breakdown/pointer_loss": 0.796475887298584, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00022440215980168432, "loss_breakdown/pointer_loss": 2.426166534423828, "step": 730 }, { "epoch": 0.07444137104689551, "loss_breakdown/lm_loss": 0.00015954201808199286, "loss_breakdown/pointer_loss": 0.24452584981918335, "step": 730 }, { "epoch": 0.0754611158557571, "grad_norm": 4.266630195685752, "learning_rate": 3.7665647298674824e-06, "loss": 0.3719, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.0001501058432040736, "loss_breakdown/pointer_loss": 0.3372942805290222, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.00017329695401713252, "loss_breakdown/pointer_loss": 0.42122510075569153, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.00013785803457722068, "loss_breakdown/pointer_loss": 0.30709391832351685, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.00019506987882778049, "loss_breakdown/pointer_loss": 0.359128475189209, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.00014204729814082384, "loss_breakdown/pointer_loss": 0.1374058872461319, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.00014478938828688115, "loss_breakdown/pointer_loss": 0.5451089143753052, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.0001605314901098609, "loss_breakdown/pointer_loss": 0.56644606590271, "step": 740 }, { "epoch": 0.0754611158557571, "loss_breakdown/lm_loss": 0.00014101310807745904, "loss_breakdown/pointer_loss": 0.33869725465774536, "step": 740 }, { "epoch": 0.07648086066461868, "grad_norm": 9.601332349549645, "learning_rate": 3.817533129459735e-06, "loss": 0.3531, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.0005870793829672039, "loss_breakdown/pointer_loss": 2.4134771823883057, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.00019188073929399252, "loss_breakdown/pointer_loss": 0.8017383813858032, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.00017381933867000043, "loss_breakdown/pointer_loss": 0.7189490795135498, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.0001947477285284549, "loss_breakdown/pointer_loss": 0.5407094359397888, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.00018555374117568135, "loss_breakdown/pointer_loss": 0.464824914932251, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.00014590553473681211, "loss_breakdown/pointer_loss": 1.5076366662979126, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.0001320446317549795, "loss_breakdown/pointer_loss": 0.8318736553192139, "step": 750 }, { "epoch": 0.07648086066461868, "loss_breakdown/lm_loss": 0.00016603528638370335, "loss_breakdown/pointer_loss": 1.2850711345672607, "step": 750 }, { "epoch": 0.07750060547348026, "grad_norm": 5.34902786029776, "learning_rate": 3.868501529051988e-06, "loss": 0.3631, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00013099393981974572, "loss_breakdown/pointer_loss": 0.23149535059928894, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00022714829538017511, "loss_breakdown/pointer_loss": 0.4193202555179596, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.0001689708005869761, "loss_breakdown/pointer_loss": 0.8161982297897339, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00021105704945512116, "loss_breakdown/pointer_loss": 0.5781418681144714, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00019910914124920964, "loss_breakdown/pointer_loss": 0.3395395576953888, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00019104314560536295, "loss_breakdown/pointer_loss": 0.24583007395267487, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00023441588564310223, "loss_breakdown/pointer_loss": 0.23568759858608246, "step": 760 }, { "epoch": 0.07750060547348026, "loss_breakdown/lm_loss": 0.00039530170033685863, "loss_breakdown/pointer_loss": 0.6496769189834595, "step": 760 }, { "epoch": 0.07852035028234185, "grad_norm": 9.314016702976195, "learning_rate": 3.919469928644241e-06, "loss": 0.3812, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00020559410040732473, "loss_breakdown/pointer_loss": 0.4278773069381714, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00014799766358919442, "loss_breakdown/pointer_loss": 0.2862022817134857, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00016416219295933843, "loss_breakdown/pointer_loss": 0.5050333738327026, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.0001673693477641791, "loss_breakdown/pointer_loss": 0.3651273250579834, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00014573594671674073, "loss_breakdown/pointer_loss": 0.3894961178302765, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00022553101007360965, "loss_breakdown/pointer_loss": 1.3075902462005615, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00019590627925936133, "loss_breakdown/pointer_loss": 1.470076560974121, "step": 770 }, { "epoch": 0.07852035028234185, "loss_breakdown/lm_loss": 0.00016325421165674925, "loss_breakdown/pointer_loss": 0.5135935544967651, "step": 770 }, { "epoch": 0.07954009509120342, "grad_norm": 5.520733738232498, "learning_rate": 3.9704383282364936e-06, "loss": 0.3442, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00016952260921243578, "loss_breakdown/pointer_loss": 0.21040621399879456, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00013323839812073857, "loss_breakdown/pointer_loss": 0.32911479473114014, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00021109652880113572, "loss_breakdown/pointer_loss": 0.570669412612915, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00018966832431033254, "loss_breakdown/pointer_loss": 0.30051541328430176, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.0001537932694191113, "loss_breakdown/pointer_loss": 0.20144374668598175, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00019728262850549072, "loss_breakdown/pointer_loss": 0.41422826051712036, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00012879609130322933, "loss_breakdown/pointer_loss": 0.27816057205200195, "step": 780 }, { "epoch": 0.07954009509120342, "loss_breakdown/lm_loss": 0.00016324990428984165, "loss_breakdown/pointer_loss": 3.322690010070801, "step": 780 }, { "epoch": 0.080559839900065, "grad_norm": 5.1605726086878185, "learning_rate": 4.021406727828747e-06, "loss": 0.3941, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00011192619422217831, "loss_breakdown/pointer_loss": 0.5078670978546143, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00011047831503674388, "loss_breakdown/pointer_loss": 0.2817322015762329, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00010519141505938023, "loss_breakdown/pointer_loss": 0.49818554520606995, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 9.298123768530786e-05, "loss_breakdown/pointer_loss": 0.3228886127471924, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00011210359662072733, "loss_breakdown/pointer_loss": 1.3545161485671997, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00010911091521847993, "loss_breakdown/pointer_loss": 0.2873498797416687, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00013293640222400427, "loss_breakdown/pointer_loss": 0.8020501136779785, "step": 790 }, { "epoch": 0.080559839900065, "loss_breakdown/lm_loss": 0.00010279960406478494, "loss_breakdown/pointer_loss": 0.5537177920341492, "step": 790 }, { "epoch": 0.0815795847089266, "grad_norm": 20.070326392468935, "learning_rate": 4.072375127420999e-06, "loss": 0.3811, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.0002152908855350688, "loss_breakdown/pointer_loss": 3.111757755279541, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.00015243155939970165, "loss_breakdown/pointer_loss": 0.5587488412857056, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.00012037485430482775, "loss_breakdown/pointer_loss": 0.941627562046051, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.00011964488658122718, "loss_breakdown/pointer_loss": 0.8646599650382996, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.00011801569780800492, "loss_breakdown/pointer_loss": 0.5479191541671753, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.00012353842612355947, "loss_breakdown/pointer_loss": 0.6307918429374695, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.0001106659765355289, "loss_breakdown/pointer_loss": 0.5917068123817444, "step": 800 }, { "epoch": 0.0815795847089266, "loss_breakdown/lm_loss": 0.00011224525951547548, "loss_breakdown/pointer_loss": 0.9125601649284363, "step": 800 }, { "epoch": 0.08259932951778817, "grad_norm": 4.679215675126441, "learning_rate": 4.123343527013252e-06, "loss": 0.3683, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.00011847328278236091, "loss_breakdown/pointer_loss": 1.047338843345642, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.0001461577194277197, "loss_breakdown/pointer_loss": 0.4564018249511719, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.0001300393050769344, "loss_breakdown/pointer_loss": 0.46132412552833557, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.00011718132736859843, "loss_breakdown/pointer_loss": 0.18277552723884583, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.00015338069351855665, "loss_breakdown/pointer_loss": 0.3538161516189575, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.00012464317842386663, "loss_breakdown/pointer_loss": 0.41125571727752686, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.00017689517699182034, "loss_breakdown/pointer_loss": 0.3231269121170044, "step": 810 }, { "epoch": 0.08259932951778817, "loss_breakdown/lm_loss": 0.00015147053636610508, "loss_breakdown/pointer_loss": 0.1866457462310791, "step": 810 }, { "epoch": 0.08361907432664975, "grad_norm": 6.819645429776091, "learning_rate": 4.174311926605505e-06, "loss": 0.3757, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00010039441986009479, "loss_breakdown/pointer_loss": 0.5159969925880432, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00015872064977884293, "loss_breakdown/pointer_loss": 1.2339386940002441, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00010105270484928042, "loss_breakdown/pointer_loss": 0.7937461733818054, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00013193523045629263, "loss_breakdown/pointer_loss": 0.5415487885475159, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00015268872084561735, "loss_breakdown/pointer_loss": 0.8487644195556641, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00011639175500022247, "loss_breakdown/pointer_loss": 0.7514753341674805, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00011902389087481424, "loss_breakdown/pointer_loss": 0.2993989884853363, "step": 820 }, { "epoch": 0.08361907432664975, "loss_breakdown/lm_loss": 0.00012074568076059222, "loss_breakdown/pointer_loss": 1.0462919473648071, "step": 820 }, { "epoch": 0.08463881913551134, "grad_norm": 10.29366290438452, "learning_rate": 4.225280326197758e-06, "loss": 0.3535, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00013993302127346396, "loss_breakdown/pointer_loss": 3.126352548599243, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00022354451357387006, "loss_breakdown/pointer_loss": 0.3034546673297882, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00014526287850458175, "loss_breakdown/pointer_loss": 0.7227669358253479, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00014761695638298988, "loss_breakdown/pointer_loss": 0.16458746790885925, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.0001451423449907452, "loss_breakdown/pointer_loss": 0.20258226990699768, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00013673982175532728, "loss_breakdown/pointer_loss": 0.1630719155073166, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00014175019168760628, "loss_breakdown/pointer_loss": 0.3904906213283539, "step": 830 }, { "epoch": 0.08463881913551134, "loss_breakdown/lm_loss": 0.00026917908689938486, "loss_breakdown/pointer_loss": 0.38182011246681213, "step": 830 }, { "epoch": 0.08565856394437292, "grad_norm": 2.7963171456146005, "learning_rate": 4.27624872579001e-06, "loss": 0.3781, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.00012062585301464424, "loss_breakdown/pointer_loss": 0.9864314794540405, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.00012680329382419586, "loss_breakdown/pointer_loss": 0.6923990249633789, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.00011509686009958386, "loss_breakdown/pointer_loss": 0.643044114112854, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.00010205572471022606, "loss_breakdown/pointer_loss": 0.4806225895881653, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.00013470540579874068, "loss_breakdown/pointer_loss": 1.8648713827133179, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.00011274089774815366, "loss_breakdown/pointer_loss": 0.589189350605011, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 0.0001057403496815823, "loss_breakdown/pointer_loss": 0.47603917121887207, "step": 840 }, { "epoch": 0.08565856394437292, "loss_breakdown/lm_loss": 8.847400749800727e-05, "loss_breakdown/pointer_loss": 0.2818123400211334, "step": 840 }, { "epoch": 0.0866783087532345, "grad_norm": 89.84618884385809, "learning_rate": 4.3272171253822634e-06, "loss": 0.3522, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 0.00021802997798658907, "loss_breakdown/pointer_loss": 1.050041913986206, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 0.0001575113710714504, "loss_breakdown/pointer_loss": 0.6961953639984131, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 0.0001268048072233796, "loss_breakdown/pointer_loss": 0.9532806873321533, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 0.00013078474148642272, "loss_breakdown/pointer_loss": 0.8283473253250122, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 0.00011560124403331429, "loss_breakdown/pointer_loss": 0.8173699378967285, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 0.00010074793681269512, "loss_breakdown/pointer_loss": 0.6932021379470825, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 9.01102539501153e-05, "loss_breakdown/pointer_loss": 0.8086802959442139, "step": 850 }, { "epoch": 0.0866783087532345, "loss_breakdown/lm_loss": 9.017082629725337e-05, "loss_breakdown/pointer_loss": 0.5452665686607361, "step": 850 }, { "epoch": 0.08769805356209609, "grad_norm": 11.41539670660943, "learning_rate": 4.378185524974517e-06, "loss": 0.3355, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 8.732340938877314e-05, "loss_breakdown/pointer_loss": 1.3927440643310547, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 0.00010564357944531366, "loss_breakdown/pointer_loss": 1.551464319229126, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 8.967206667875871e-05, "loss_breakdown/pointer_loss": 0.10397534817457199, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 9.950960520654917e-05, "loss_breakdown/pointer_loss": 0.26496320962905884, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 0.0001584622368682176, "loss_breakdown/pointer_loss": 0.26720333099365234, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 0.00010614604980219156, "loss_breakdown/pointer_loss": 0.658532977104187, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 0.0001829685061238706, "loss_breakdown/pointer_loss": 0.7893489599227905, "step": 860 }, { "epoch": 0.08769805356209609, "loss_breakdown/lm_loss": 0.00024162838235497475, "loss_breakdown/pointer_loss": 0.5303725004196167, "step": 860 }, { "epoch": 0.08871779837095767, "grad_norm": 3.984696553397037, "learning_rate": 4.429153924566769e-06, "loss": 0.3814, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 8.678682206664234e-05, "loss_breakdown/pointer_loss": 0.5518207550048828, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 0.00012148779933340847, "loss_breakdown/pointer_loss": 0.8824748992919922, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 8.417798380833119e-05, "loss_breakdown/pointer_loss": 0.7961520552635193, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 0.00012411880015861243, "loss_breakdown/pointer_loss": 0.8314223289489746, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 9.149095421889797e-05, "loss_breakdown/pointer_loss": 0.4601778984069824, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 8.175632683560252e-05, "loss_breakdown/pointer_loss": 0.4585471451282501, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 0.00010266391473123804, "loss_breakdown/pointer_loss": 0.27752333879470825, "step": 870 }, { "epoch": 0.08871779837095767, "loss_breakdown/lm_loss": 9.974030399462208e-05, "loss_breakdown/pointer_loss": 0.39933720231056213, "step": 870 }, { "epoch": 0.08973754317981925, "grad_norm": 45.21246303583918, "learning_rate": 4.480122324159022e-06, "loss": 0.3407, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.0001072314043994993, "loss_breakdown/pointer_loss": 0.14679530262947083, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.00012319056259002537, "loss_breakdown/pointer_loss": 2.128692865371704, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.0001524040853837505, "loss_breakdown/pointer_loss": 0.5153058171272278, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.00012389413313940167, "loss_breakdown/pointer_loss": 0.6976501941680908, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.00014046525757294148, "loss_breakdown/pointer_loss": 5.481105327606201, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.00015905129839666188, "loss_breakdown/pointer_loss": 1.1916453838348389, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.00015524627815466374, "loss_breakdown/pointer_loss": 0.44213053584098816, "step": 880 }, { "epoch": 0.08973754317981925, "loss_breakdown/lm_loss": 0.00014709871902596205, "loss_breakdown/pointer_loss": 0.15359213948249817, "step": 880 }, { "epoch": 0.09075728798868084, "grad_norm": 3.1641199973810274, "learning_rate": 4.531090723751275e-06, "loss": 0.3675, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 0.00011310246918583289, "loss_breakdown/pointer_loss": 0.533359169960022, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 9.939719893736765e-05, "loss_breakdown/pointer_loss": 0.4938592314720154, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 0.00012970776879228652, "loss_breakdown/pointer_loss": 0.4201236963272095, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 0.00010145254782401025, "loss_breakdown/pointer_loss": 0.2993767261505127, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 0.00016944056551437825, "loss_breakdown/pointer_loss": 0.8203945755958557, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 8.579294808441773e-05, "loss_breakdown/pointer_loss": 0.8089872598648071, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 0.00010123570245923474, "loss_breakdown/pointer_loss": 0.27887892723083496, "step": 890 }, { "epoch": 0.09075728798868084, "loss_breakdown/lm_loss": 0.00012310805323068053, "loss_breakdown/pointer_loss": 0.5068991184234619, "step": 890 }, { "epoch": 0.09177703279754242, "grad_norm": 21.14584507177523, "learning_rate": 4.5820591233435275e-06, "loss": 0.3358, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 0.00014652617392130196, "loss_breakdown/pointer_loss": 1.689835786819458, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 0.000157690912601538, "loss_breakdown/pointer_loss": 0.8011429309844971, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 8.32416262710467e-05, "loss_breakdown/pointer_loss": 0.96182781457901, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 9.318923548562452e-05, "loss_breakdown/pointer_loss": 0.5590513348579407, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 8.815739420242608e-05, "loss_breakdown/pointer_loss": 1.447575330734253, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 8.749900734983385e-05, "loss_breakdown/pointer_loss": 0.9483151435852051, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 9.157420572591946e-05, "loss_breakdown/pointer_loss": 0.527229905128479, "step": 900 }, { "epoch": 0.09177703279754242, "loss_breakdown/lm_loss": 9.038472489919513e-05, "loss_breakdown/pointer_loss": 0.8712334632873535, "step": 900 }, { "epoch": 0.092796777606404, "grad_norm": 5.286302194330881, "learning_rate": 4.63302752293578e-06, "loss": 0.314, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 0.00011856250785058364, "loss_breakdown/pointer_loss": 0.5726526379585266, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 7.381044997600839e-05, "loss_breakdown/pointer_loss": 0.46892714500427246, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 8.419672667514533e-05, "loss_breakdown/pointer_loss": 0.45473307371139526, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 8.751373388804495e-05, "loss_breakdown/pointer_loss": 0.29001912474632263, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 8.98848011274822e-05, "loss_breakdown/pointer_loss": 0.24608135223388672, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 7.933827146189287e-05, "loss_breakdown/pointer_loss": 0.17771978676319122, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 0.00013705427409149706, "loss_breakdown/pointer_loss": 0.6163972616195679, "step": 910 }, { "epoch": 0.092796777606404, "loss_breakdown/lm_loss": 7.370873936451972e-05, "loss_breakdown/pointer_loss": 0.09147913753986359, "step": 910 }, { "epoch": 0.09381652241526559, "grad_norm": 3.572218859668829, "learning_rate": 4.683995922528033e-06, "loss": 0.364, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 8.695053838891909e-05, "loss_breakdown/pointer_loss": 0.5941790342330933, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 7.343744073295966e-05, "loss_breakdown/pointer_loss": 0.566443145275116, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 6.7518325522542e-05, "loss_breakdown/pointer_loss": 0.29952478408813477, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 7.920237840153277e-05, "loss_breakdown/pointer_loss": 0.27286770939826965, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 6.973775452934206e-05, "loss_breakdown/pointer_loss": 0.6963658332824707, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 6.76495983498171e-05, "loss_breakdown/pointer_loss": 0.3188644051551819, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 7.601189281558618e-05, "loss_breakdown/pointer_loss": 1.0056931972503662, "step": 920 }, { "epoch": 0.09381652241526559, "loss_breakdown/lm_loss": 0.00012340197281446308, "loss_breakdown/pointer_loss": 1.2657102346420288, "step": 920 }, { "epoch": 0.09483626722412716, "grad_norm": 5.432494191058925, "learning_rate": 4.734964322120286e-06, "loss": 0.3332, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 7.42251068004407e-05, "loss_breakdown/pointer_loss": 0.09060388803482056, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 7.434521830873564e-05, "loss_breakdown/pointer_loss": 0.23578062653541565, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 8.391452138312161e-05, "loss_breakdown/pointer_loss": 0.18846260011196136, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 7.343928155023605e-05, "loss_breakdown/pointer_loss": 0.13174059987068176, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 8.761319622863084e-05, "loss_breakdown/pointer_loss": 0.2737557888031006, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 0.00012216708273626864, "loss_breakdown/pointer_loss": 5.542922496795654, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 0.00010532101441640407, "loss_breakdown/pointer_loss": 0.5409517288208008, "step": 930 }, { "epoch": 0.09483626722412716, "loss_breakdown/lm_loss": 0.00012986604997422546, "loss_breakdown/pointer_loss": 0.5065701007843018, "step": 930 }, { "epoch": 0.09585601203298874, "grad_norm": 5.571529814528112, "learning_rate": 4.785932721712538e-06, "loss": 0.3549, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 7.122568786144257e-05, "loss_breakdown/pointer_loss": 0.37470731139183044, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 6.836094689788297e-05, "loss_breakdown/pointer_loss": 0.7562990784645081, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 7.564476982224733e-05, "loss_breakdown/pointer_loss": 0.5918815732002258, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 6.708914588671178e-05, "loss_breakdown/pointer_loss": 0.5821884870529175, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 7.42887204978615e-05, "loss_breakdown/pointer_loss": 0.20315146446228027, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 6.715850031469017e-05, "loss_breakdown/pointer_loss": 0.18412992358207703, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 7.221927808132023e-05, "loss_breakdown/pointer_loss": 0.5609650611877441, "step": 940 }, { "epoch": 0.09585601203298874, "loss_breakdown/lm_loss": 7.754873513476923e-05, "loss_breakdown/pointer_loss": 0.4192306399345398, "step": 940 }, { "epoch": 0.09687575684185033, "grad_norm": 8.113020742426109, "learning_rate": 4.836901121304791e-06, "loss": 0.3231, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.00025251490296795964, "loss_breakdown/pointer_loss": 1.568216323852539, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.0001466748071834445, "loss_breakdown/pointer_loss": 0.7336281538009644, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.0001827787491492927, "loss_breakdown/pointer_loss": 0.7859505414962769, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.00014510119217447937, "loss_breakdown/pointer_loss": 0.5511218309402466, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.00011495943908812478, "loss_breakdown/pointer_loss": 0.8998233079910278, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.00014799270138610154, "loss_breakdown/pointer_loss": 0.7436541318893433, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 9.990818944061175e-05, "loss_breakdown/pointer_loss": 0.43273288011550903, "step": 950 }, { "epoch": 0.09687575684185033, "loss_breakdown/lm_loss": 0.00010931301221717149, "loss_breakdown/pointer_loss": 0.6729751825332642, "step": 950 }, { "epoch": 0.09789550165071191, "grad_norm": 4.969789391071234, "learning_rate": 4.887869520897044e-06, "loss": 0.3516, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 8.295613224618137e-05, "loss_breakdown/pointer_loss": 0.42054182291030884, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 9.617882460588589e-05, "loss_breakdown/pointer_loss": 0.5806325674057007, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 7.845288928365335e-05, "loss_breakdown/pointer_loss": 0.2935192286968231, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 8.01629139459692e-05, "loss_breakdown/pointer_loss": 1.7425898313522339, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 8.039038220886141e-05, "loss_breakdown/pointer_loss": 0.2310241162776947, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 8.71552256285213e-05, "loss_breakdown/pointer_loss": 0.5180397629737854, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 8.37955522001721e-05, "loss_breakdown/pointer_loss": 0.3021320402622223, "step": 960 }, { "epoch": 0.09789550165071191, "loss_breakdown/lm_loss": 9.225191024597734e-05, "loss_breakdown/pointer_loss": 0.2126062512397766, "step": 960 }, { "epoch": 0.09891524645957349, "grad_norm": 3.6218768731604203, "learning_rate": 4.9388379204892974e-06, "loss": 0.3798, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 8.607510244473815e-05, "loss_breakdown/pointer_loss": 0.3574126064777374, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 9.327779116574675e-05, "loss_breakdown/pointer_loss": 1.1434311866760254, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 0.0001275258109671995, "loss_breakdown/pointer_loss": 0.45858365297317505, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 8.177563722711056e-05, "loss_breakdown/pointer_loss": 0.727165699005127, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 8.213792170863599e-05, "loss_breakdown/pointer_loss": 0.4214639663696289, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 9.008503548102453e-05, "loss_breakdown/pointer_loss": 0.41610249876976013, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 7.673427899135277e-05, "loss_breakdown/pointer_loss": 0.5873970985412598, "step": 970 }, { "epoch": 0.09891524645957349, "loss_breakdown/lm_loss": 6.616325117647648e-05, "loss_breakdown/pointer_loss": 0.5244318246841431, "step": 970 }, { "epoch": 0.09993499126843508, "grad_norm": 6.480912489222791, "learning_rate": 4.98980632008155e-06, "loss": 0.3622, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 8.701109618414193e-05, "loss_breakdown/pointer_loss": 2.964244842529297, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 8.02539725555107e-05, "loss_breakdown/pointer_loss": 0.3269467353820801, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 7.590302993776277e-05, "loss_breakdown/pointer_loss": 0.6660904884338379, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 6.415136158466339e-05, "loss_breakdown/pointer_loss": 0.586029052734375, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 8.245385834015906e-05, "loss_breakdown/pointer_loss": 0.3860604763031006, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 7.814700074959546e-05, "loss_breakdown/pointer_loss": 0.3373510241508484, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 5.295612936606631e-05, "loss_breakdown/pointer_loss": 0.17832261323928833, "step": 980 }, { "epoch": 0.09993499126843508, "loss_breakdown/lm_loss": 0.0001110561061068438, "loss_breakdown/pointer_loss": 7.445046424865723, "step": 980 }, { "epoch": 0.10095473607729666, "grad_norm": 2.3167414628910916, "learning_rate": 4.9954674220963174e-06, "loss": 0.3549, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 7.209367322502658e-05, "loss_breakdown/pointer_loss": 0.7215653657913208, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 0.00012908343342132866, "loss_breakdown/pointer_loss": 1.0462665557861328, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 5.618955037789419e-05, "loss_breakdown/pointer_loss": 0.5119261145591736, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 6.622062937822193e-05, "loss_breakdown/pointer_loss": 0.7467916011810303, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 0.00012045745097566396, "loss_breakdown/pointer_loss": 0.4127165973186493, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 7.238012767629698e-05, "loss_breakdown/pointer_loss": 0.7844221591949463, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 5.775721729150973e-05, "loss_breakdown/pointer_loss": 0.9548245668411255, "step": 990 }, { "epoch": 0.10095473607729666, "loss_breakdown/lm_loss": 6.560573820024729e-05, "loss_breakdown/pointer_loss": 0.18832936882972717, "step": 990 }, { "epoch": 0.10197448088615824, "grad_norm": 7.386030214768305, "learning_rate": 4.989801699716714e-06, "loss": 0.3436, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 0.00012547452934086323, "loss_breakdown/pointer_loss": 2.5568418502807617, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 0.00015210943820420653, "loss_breakdown/pointer_loss": 0.8054060935974121, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 7.593205373268574e-05, "loss_breakdown/pointer_loss": 0.7347294092178345, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 8.885376155376434e-05, "loss_breakdown/pointer_loss": 0.4408726692199707, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 0.00015120806347113103, "loss_breakdown/pointer_loss": 1.0571941137313843, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 9.111179679166526e-05, "loss_breakdown/pointer_loss": 0.7364094257354736, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 5.8281679230276495e-05, "loss_breakdown/pointer_loss": 0.21648091077804565, "step": 1000 }, { "epoch": 0.10197448088615824, "loss_breakdown/lm_loss": 5.9621917898766696e-05, "loss_breakdown/pointer_loss": 0.2728777527809143, "step": 1000 }, { "epoch": 0.10299422569501983, "grad_norm": 4.971648515797285, "learning_rate": 4.9841359773371104e-06, "loss": 0.3326, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 6.883348396513611e-05, "loss_breakdown/pointer_loss": 0.8534906506538391, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 6.683184619760141e-05, "loss_breakdown/pointer_loss": 0.17148077487945557, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 7.92217324487865e-05, "loss_breakdown/pointer_loss": 0.2530561685562134, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 9.131202386924997e-05, "loss_breakdown/pointer_loss": 2.231865406036377, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 7.623832061653957e-05, "loss_breakdown/pointer_loss": 0.3506850004196167, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 8.099189290078357e-05, "loss_breakdown/pointer_loss": 0.12046387791633606, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 7.656407251488417e-05, "loss_breakdown/pointer_loss": 0.3980117440223694, "step": 1010 }, { "epoch": 0.10299422569501983, "loss_breakdown/lm_loss": 6.694909825455397e-05, "loss_breakdown/pointer_loss": 0.48040586709976196, "step": 1010 }, { "epoch": 0.1040139705038814, "grad_norm": 3.0574363398334525, "learning_rate": 4.978470254957508e-06, "loss": 0.3901, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 6.339202809613198e-05, "loss_breakdown/pointer_loss": 0.666016161441803, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 5.0662303692661226e-05, "loss_breakdown/pointer_loss": 0.6859936714172363, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 5.692164631909691e-05, "loss_breakdown/pointer_loss": 0.40156736969947815, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 5.4440250096376985e-05, "loss_breakdown/pointer_loss": 1.2619895935058594, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 6.0876911447849125e-05, "loss_breakdown/pointer_loss": 1.30573570728302, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 5.425698691396974e-05, "loss_breakdown/pointer_loss": 0.5839633345603943, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 5.433929254650138e-05, "loss_breakdown/pointer_loss": 0.4955675005912781, "step": 1020 }, { "epoch": 0.1040139705038814, "loss_breakdown/lm_loss": 4.9042373575503007e-05, "loss_breakdown/pointer_loss": 0.30417537689208984, "step": 1020 }, { "epoch": 0.10503371531274298, "grad_norm": 5.669633583927759, "learning_rate": 4.972804532577904e-06, "loss": 0.3391, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 9.894701361190528e-05, "loss_breakdown/pointer_loss": 0.49804574251174927, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 7.600591197842732e-05, "loss_breakdown/pointer_loss": 2.805046558380127, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 7.08990337443538e-05, "loss_breakdown/pointer_loss": 0.3199497163295746, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 9.754604252520949e-05, "loss_breakdown/pointer_loss": 0.07464313507080078, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 6.28203124506399e-05, "loss_breakdown/pointer_loss": 0.07084298133850098, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 7.21087126294151e-05, "loss_breakdown/pointer_loss": 0.2948952913284302, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 0.00018534045375417918, "loss_breakdown/pointer_loss": 1.3888258934020996, "step": 1030 }, { "epoch": 0.10503371531274298, "loss_breakdown/lm_loss": 6.81518649798818e-05, "loss_breakdown/pointer_loss": 0.42069947719573975, "step": 1030 }, { "epoch": 0.10605346012160456, "grad_norm": 3.4196002637766294, "learning_rate": 4.967138810198301e-06, "loss": 0.3676, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 5.1239843742223457e-05, "loss_breakdown/pointer_loss": 0.31783396005630493, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 9.266456618206576e-05, "loss_breakdown/pointer_loss": 1.776216745376587, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 7.544967957073823e-05, "loss_breakdown/pointer_loss": 0.33851495385169983, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 6.422096339520067e-05, "loss_breakdown/pointer_loss": 0.33467501401901245, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 0.002170427469536662, "loss_breakdown/pointer_loss": 0.5537002086639404, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 6.599519110750407e-05, "loss_breakdown/pointer_loss": 0.8839381337165833, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 6.616456812480465e-05, "loss_breakdown/pointer_loss": 1.0742689371109009, "step": 1040 }, { "epoch": 0.10605346012160456, "loss_breakdown/lm_loss": 6.677168858004734e-05, "loss_breakdown/pointer_loss": 0.9258497357368469, "step": 1040 }, { "epoch": 0.10707320493046615, "grad_norm": 8.556773781407383, "learning_rate": 4.961473087818697e-06, "loss": 0.3512, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.00026541322586126626, "loss_breakdown/pointer_loss": 2.8610568046569824, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.0001437829341739416, "loss_breakdown/pointer_loss": 0.797265350818634, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.00012499319564085454, "loss_breakdown/pointer_loss": 1.5225095748901367, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.00015207863179966807, "loss_breakdown/pointer_loss": 0.47430628538131714, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.0001254108501598239, "loss_breakdown/pointer_loss": 0.8289909958839417, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.00013337678683456033, "loss_breakdown/pointer_loss": 0.5875329971313477, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 0.0002613677061162889, "loss_breakdown/pointer_loss": 0.5955603122711182, "step": 1050 }, { "epoch": 0.10707320493046615, "loss_breakdown/lm_loss": 9.408862388227135e-05, "loss_breakdown/pointer_loss": 0.42429542541503906, "step": 1050 }, { "epoch": 0.10809294973932773, "grad_norm": 4.093384744384952, "learning_rate": 4.955807365439094e-06, "loss": 0.334, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 6.46018233965151e-05, "loss_breakdown/pointer_loss": 1.0409680604934692, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 7.767832721583545e-05, "loss_breakdown/pointer_loss": 0.17467668652534485, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 6.148128159111366e-05, "loss_breakdown/pointer_loss": 0.9738969802856445, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 7.264970190590248e-05, "loss_breakdown/pointer_loss": 0.2170221209526062, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 7.778077269904315e-05, "loss_breakdown/pointer_loss": 0.30583202838897705, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 0.0002236121508758515, "loss_breakdown/pointer_loss": 0.9534614086151123, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 8.325207454618067e-05, "loss_breakdown/pointer_loss": 0.5416335463523865, "step": 1060 }, { "epoch": 0.10809294973932773, "loss_breakdown/lm_loss": 9.302600665250793e-05, "loss_breakdown/pointer_loss": 0.9630628824234009, "step": 1060 }, { "epoch": 0.10911269454818931, "grad_norm": 2.265377579590172, "learning_rate": 4.95014164305949e-06, "loss": 0.3597, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 5.9030506236013025e-05, "loss_breakdown/pointer_loss": 0.6036400198936462, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 8.065867586992681e-05, "loss_breakdown/pointer_loss": 1.503627896308899, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 7.456859748344868e-05, "loss_breakdown/pointer_loss": 0.6415550708770752, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 8.750168490223587e-05, "loss_breakdown/pointer_loss": 0.8796756267547607, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 4.89523408759851e-05, "loss_breakdown/pointer_loss": 0.48653069138526917, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 5.5789885664125904e-05, "loss_breakdown/pointer_loss": 0.1673905849456787, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 6.186333484947681e-05, "loss_breakdown/pointer_loss": 0.33567506074905396, "step": 1070 }, { "epoch": 0.10911269454818931, "loss_breakdown/lm_loss": 5.569841232500039e-05, "loss_breakdown/pointer_loss": 0.5192152261734009, "step": 1070 }, { "epoch": 0.1101324393570509, "grad_norm": 7.7870853479971345, "learning_rate": 4.944475920679887e-06, "loss": 0.3539, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 4.458073453861289e-05, "loss_breakdown/pointer_loss": 0.25750020146369934, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 7.679956615902483e-05, "loss_breakdown/pointer_loss": 0.149507537484169, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 9.701416274765506e-05, "loss_breakdown/pointer_loss": 0.19185152649879456, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 5.883193443878554e-05, "loss_breakdown/pointer_loss": 0.11529441922903061, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 7.291040674317628e-05, "loss_breakdown/pointer_loss": 0.12563657760620117, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 6.678522186120972e-05, "loss_breakdown/pointer_loss": 0.2370985597372055, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 5.039762254455127e-05, "loss_breakdown/pointer_loss": 0.09931632876396179, "step": 1080 }, { "epoch": 0.1101324393570509, "loss_breakdown/lm_loss": 0.00010684690641937777, "loss_breakdown/pointer_loss": 0.5357144474983215, "step": 1080 }, { "epoch": 0.11115218416591248, "grad_norm": 5.851189329000443, "learning_rate": 4.938810198300284e-06, "loss": 0.3541, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 6.385201413650066e-05, "loss_breakdown/pointer_loss": 1.123061180114746, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 6.69869696139358e-05, "loss_breakdown/pointer_loss": 0.47304394841194153, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 6.840247806394473e-05, "loss_breakdown/pointer_loss": 1.6133215427398682, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 7.092808664310724e-05, "loss_breakdown/pointer_loss": 0.22003039717674255, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 5.1216527936048806e-05, "loss_breakdown/pointer_loss": 0.4225407838821411, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 6.2033228459768e-05, "loss_breakdown/pointer_loss": 0.46161338686943054, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 5.804405373055488e-05, "loss_breakdown/pointer_loss": 0.25574004650115967, "step": 1090 }, { "epoch": 0.11115218416591248, "loss_breakdown/lm_loss": 6.398212281055748e-05, "loss_breakdown/pointer_loss": 0.3589763045310974, "step": 1090 }, { "epoch": 0.11217192897477406, "grad_norm": 12.634850032749739, "learning_rate": 4.933144475920681e-06, "loss": 0.3559, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 0.00025969379930756986, "loss_breakdown/pointer_loss": 1.1315743923187256, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 0.00019590019655879587, "loss_breakdown/pointer_loss": 1.1513473987579346, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 0.00016811526438686997, "loss_breakdown/pointer_loss": 0.7701449990272522, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 0.00018028763588517904, "loss_breakdown/pointer_loss": 0.8790040016174316, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 0.0001674155064392835, "loss_breakdown/pointer_loss": 1.0289822816848755, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 0.0001405080547556281, "loss_breakdown/pointer_loss": 0.7590170502662659, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 8.273775893030688e-05, "loss_breakdown/pointer_loss": 0.5172677040100098, "step": 1100 }, { "epoch": 0.11217192897477406, "loss_breakdown/lm_loss": 9.903734462568536e-05, "loss_breakdown/pointer_loss": 0.5129834413528442, "step": 1100 }, { "epoch": 0.11319167378363565, "grad_norm": 6.2382856834305445, "learning_rate": 4.927478753541076e-06, "loss": 0.3222, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 7.096349145285785e-05, "loss_breakdown/pointer_loss": 0.543343186378479, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 6.833750376245007e-05, "loss_breakdown/pointer_loss": 0.4469541013240814, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 6.116292206570506e-05, "loss_breakdown/pointer_loss": 0.35019493103027344, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 5.310041888151318e-05, "loss_breakdown/pointer_loss": 0.24541179835796356, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 6.173154542921111e-05, "loss_breakdown/pointer_loss": 0.6465861201286316, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 5.53983190911822e-05, "loss_breakdown/pointer_loss": 0.9288883209228516, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 5.427854193840176e-05, "loss_breakdown/pointer_loss": 0.26938721537590027, "step": 1110 }, { "epoch": 0.11319167378363565, "loss_breakdown/lm_loss": 7.50130056985654e-05, "loss_breakdown/pointer_loss": 0.5084578394889832, "step": 1110 }, { "epoch": 0.11421141859249723, "grad_norm": 3.01268048364593, "learning_rate": 4.921813031161474e-06, "loss": 0.3528, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 6.220669456524774e-05, "loss_breakdown/pointer_loss": 0.6657122373580933, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 6.098807352827862e-05, "loss_breakdown/pointer_loss": 0.3115415871143341, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 5.8466826885705814e-05, "loss_breakdown/pointer_loss": 1.0171432495117188, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 7.24697092664428e-05, "loss_breakdown/pointer_loss": 0.31614169478416443, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 6.181743810884655e-05, "loss_breakdown/pointer_loss": 1.1870777606964111, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 6.527663936140016e-05, "loss_breakdown/pointer_loss": 0.7646321654319763, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 0.00010220773401670158, "loss_breakdown/pointer_loss": 0.8632513284683228, "step": 1120 }, { "epoch": 0.11421141859249723, "loss_breakdown/lm_loss": 5.694616993423551e-05, "loss_breakdown/pointer_loss": 0.31663382053375244, "step": 1120 }, { "epoch": 0.1152311634013588, "grad_norm": 4.831197052998748, "learning_rate": 4.91614730878187e-06, "loss": 0.317, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 6.128571112640202e-05, "loss_breakdown/pointer_loss": 0.3002685010433197, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 4.780651579494588e-05, "loss_breakdown/pointer_loss": 0.3314734995365143, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 7.930233550723642e-05, "loss_breakdown/pointer_loss": 0.21214671432971954, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 0.00023126232554204762, "loss_breakdown/pointer_loss": 0.44068479537963867, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 6.376890814863145e-05, "loss_breakdown/pointer_loss": 3.720257043838501, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 5.4163137974683195e-05, "loss_breakdown/pointer_loss": 0.20517997443675995, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 4.554207043838687e-05, "loss_breakdown/pointer_loss": 0.14354193210601807, "step": 1130 }, { "epoch": 0.1152311634013588, "loss_breakdown/lm_loss": 6.088606460252777e-05, "loss_breakdown/pointer_loss": 0.31580764055252075, "step": 1130 }, { "epoch": 0.1162509082102204, "grad_norm": 16.423489694358928, "learning_rate": 4.910481586402267e-06, "loss": 0.3502, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.795931090484373e-05, "loss_breakdown/pointer_loss": 0.33230459690093994, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.312381861382164e-05, "loss_breakdown/pointer_loss": 1.1868741512298584, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.4709006033372134e-05, "loss_breakdown/pointer_loss": 0.12744012475013733, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.353555414127186e-05, "loss_breakdown/pointer_loss": 0.1462998390197754, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.7882866056170315e-05, "loss_breakdown/pointer_loss": 0.8524929285049438, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 6.0095248045399785e-05, "loss_breakdown/pointer_loss": 0.2631150782108307, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.081401493749581e-05, "loss_breakdown/pointer_loss": 0.16697871685028076, "step": 1140 }, { "epoch": 0.1162509082102204, "loss_breakdown/lm_loss": 4.806690049008466e-05, "loss_breakdown/pointer_loss": 0.33663827180862427, "step": 1140 }, { "epoch": 0.11727065301908197, "grad_norm": 17.002169701482938, "learning_rate": 4.904815864022663e-06, "loss": 0.3653, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 9.485569171374664e-05, "loss_breakdown/pointer_loss": 1.63606595993042, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 5.789510760223493e-05, "loss_breakdown/pointer_loss": 0.5479165315628052, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 5.8605699450708926e-05, "loss_breakdown/pointer_loss": 1.0233573913574219, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 4.526907650870271e-05, "loss_breakdown/pointer_loss": 0.5876334309577942, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 5.1426915888441727e-05, "loss_breakdown/pointer_loss": 0.7370439767837524, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 4.525818440015428e-05, "loss_breakdown/pointer_loss": 0.5591131448745728, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 4.2824111005757004e-05, "loss_breakdown/pointer_loss": 0.40194106101989746, "step": 1150 }, { "epoch": 0.11727065301908197, "loss_breakdown/lm_loss": 5.228824738878757e-05, "loss_breakdown/pointer_loss": 0.33880800008773804, "step": 1150 }, { "epoch": 0.11829039782794355, "grad_norm": 5.2484834843464965, "learning_rate": 4.89915014164306e-06, "loss": 0.3386, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 3.567730891518295e-05, "loss_breakdown/pointer_loss": 0.6542326807975769, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 4.8386453272541985e-05, "loss_breakdown/pointer_loss": 0.47137996554374695, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 3.8964568375376984e-05, "loss_breakdown/pointer_loss": 0.9158980846405029, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 7.929006824269891e-05, "loss_breakdown/pointer_loss": 0.554114818572998, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 8.508971222909167e-05, "loss_breakdown/pointer_loss": 2.1291005611419678, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 6.074079647078179e-05, "loss_breakdown/pointer_loss": 0.21140924096107483, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 5.6285280152224004e-05, "loss_breakdown/pointer_loss": 0.1874023973941803, "step": 1160 }, { "epoch": 0.11829039782794355, "loss_breakdown/lm_loss": 7.465555245289579e-05, "loss_breakdown/pointer_loss": 0.3465595841407776, "step": 1160 }, { "epoch": 0.11931014263680514, "grad_norm": 3.357254467512761, "learning_rate": 4.893484419263457e-06, "loss": 0.3543, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.020496973884292e-05, "loss_breakdown/pointer_loss": 0.8082425594329834, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.903896817471832e-05, "loss_breakdown/pointer_loss": 0.32963526248931885, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.2729545132024214e-05, "loss_breakdown/pointer_loss": 0.2689076066017151, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 3.5953147744294256e-05, "loss_breakdown/pointer_loss": 0.21264737844467163, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.593629612121731e-05, "loss_breakdown/pointer_loss": 0.20740625262260437, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.714045983040705e-05, "loss_breakdown/pointer_loss": 1.580740213394165, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.532631621463224e-05, "loss_breakdown/pointer_loss": 2.0287656784057617, "step": 1170 }, { "epoch": 0.11931014263680514, "loss_breakdown/lm_loss": 5.323384903022088e-05, "loss_breakdown/pointer_loss": 0.6763225793838501, "step": 1170 }, { "epoch": 0.12032988744566672, "grad_norm": 8.240376728902191, "learning_rate": 4.8878186968838536e-06, "loss": 0.3263, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 5.2827712352154776e-05, "loss_breakdown/pointer_loss": 0.27760687470436096, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 0.00010483452933840454, "loss_breakdown/pointer_loss": 1.1160314083099365, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 0.000101870195067022, "loss_breakdown/pointer_loss": 3.929870367050171, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 5.8504552725935355e-05, "loss_breakdown/pointer_loss": 0.30257874727249146, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 6.541951734106988e-05, "loss_breakdown/pointer_loss": 0.12881338596343994, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 4.653568976209499e-05, "loss_breakdown/pointer_loss": 0.3197603225708008, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 8.378196798730642e-05, "loss_breakdown/pointer_loss": 1.2243242263793945, "step": 1180 }, { "epoch": 0.12032988744566672, "loss_breakdown/lm_loss": 0.0005720834597013891, "loss_breakdown/pointer_loss": 0.16097304224967957, "step": 1180 }, { "epoch": 0.1213496322545283, "grad_norm": 4.379111744121688, "learning_rate": 4.882152974504249e-06, "loss": 0.3591, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 4.872076533501968e-05, "loss_breakdown/pointer_loss": 0.6228724122047424, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 3.8824306102469563e-05, "loss_breakdown/pointer_loss": 0.5351582765579224, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 6.52461894787848e-05, "loss_breakdown/pointer_loss": 0.3438684046268463, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 5.408500146586448e-05, "loss_breakdown/pointer_loss": 0.6833102107048035, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 3.890313382726163e-05, "loss_breakdown/pointer_loss": 0.8105608820915222, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 4.678660479839891e-05, "loss_breakdown/pointer_loss": 0.5011630058288574, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 5.033651541452855e-05, "loss_breakdown/pointer_loss": 0.21051937341690063, "step": 1190 }, { "epoch": 0.1213496322545283, "loss_breakdown/lm_loss": 3.645358083304018e-05, "loss_breakdown/pointer_loss": 0.5364803075790405, "step": 1190 }, { "epoch": 0.12236937706338989, "grad_norm": 21.13144058497601, "learning_rate": 4.8764872521246466e-06, "loss": 0.3308, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 9.288090950576589e-05, "loss_breakdown/pointer_loss": 1.5418094396591187, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 6.516244320664555e-05, "loss_breakdown/pointer_loss": 1.399027943611145, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 6.419694545911625e-05, "loss_breakdown/pointer_loss": 0.9327883720397949, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 7.315987022593617e-05, "loss_breakdown/pointer_loss": 0.66483473777771, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 6.064186163712293e-05, "loss_breakdown/pointer_loss": 0.34329718351364136, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 4.42251875938382e-05, "loss_breakdown/pointer_loss": 0.4829660654067993, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 7.245520100696012e-05, "loss_breakdown/pointer_loss": 0.9402650594711304, "step": 1200 }, { "epoch": 0.12236937706338989, "loss_breakdown/lm_loss": 5.6439614127157256e-05, "loss_breakdown/pointer_loss": 1.068197250366211, "step": 1200 }, { "epoch": 0.12338912187225147, "grad_norm": 2.6769077634171774, "learning_rate": 4.870821529745043e-06, "loss": 0.3222, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 4.466694008442573e-05, "loss_breakdown/pointer_loss": 0.4044369161128998, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 3.3873002394102514e-05, "loss_breakdown/pointer_loss": 0.22111007571220398, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 3.762530832318589e-05, "loss_breakdown/pointer_loss": 0.25203022360801697, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 3.743164415936917e-05, "loss_breakdown/pointer_loss": 0.22123849391937256, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 4.701376747107133e-05, "loss_breakdown/pointer_loss": 0.685257613658905, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 3.752451448235661e-05, "loss_breakdown/pointer_loss": 0.13523243367671967, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 4.501517832977697e-05, "loss_breakdown/pointer_loss": 0.3780270516872406, "step": 1210 }, { "epoch": 0.12338912187225147, "loss_breakdown/lm_loss": 4.307910057832487e-05, "loss_breakdown/pointer_loss": 0.7700486183166504, "step": 1210 }, { "epoch": 0.12440886668111305, "grad_norm": 2.5087137209753965, "learning_rate": 4.8651558073654396e-06, "loss": 0.3688, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 5.080112168798223e-05, "loss_breakdown/pointer_loss": 0.8176412582397461, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 5.886257713427767e-05, "loss_breakdown/pointer_loss": 0.8143147230148315, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 5.382932067732327e-05, "loss_breakdown/pointer_loss": 0.4194311797618866, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 6.209700950421393e-05, "loss_breakdown/pointer_loss": 0.29035353660583496, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 4.6378117986023426e-05, "loss_breakdown/pointer_loss": 0.3273233473300934, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 4.3926065700361505e-05, "loss_breakdown/pointer_loss": 0.8791869878768921, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 5.301902274368331e-05, "loss_breakdown/pointer_loss": 0.5924619436264038, "step": 1220 }, { "epoch": 0.12440886668111305, "loss_breakdown/lm_loss": 3.8267702620942146e-05, "loss_breakdown/pointer_loss": 0.421628475189209, "step": 1220 }, { "epoch": 0.12542861148997464, "grad_norm": 7.999138319272629, "learning_rate": 4.859490084985836e-06, "loss": 0.3186, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 4.040098428959027e-05, "loss_breakdown/pointer_loss": 0.15581414103507996, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 4.5272034185472876e-05, "loss_breakdown/pointer_loss": 0.09206865727901459, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 4.722270023194142e-05, "loss_breakdown/pointer_loss": 0.3895464539527893, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 4.298756175558083e-05, "loss_breakdown/pointer_loss": 0.12838177382946014, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 6.573044811375439e-05, "loss_breakdown/pointer_loss": 1.485856533050537, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 6.0710939578711987e-05, "loss_breakdown/pointer_loss": 0.10795235633850098, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 3.965425639762543e-05, "loss_breakdown/pointer_loss": 3.4784226417541504, "step": 1230 }, { "epoch": 0.12542861148997464, "loss_breakdown/lm_loss": 7.086503319442272e-05, "loss_breakdown/pointer_loss": 1.8617132902145386, "step": 1230 }, { "epoch": 0.12644835629883622, "grad_norm": 3.334886010312239, "learning_rate": 4.8538243626062326e-06, "loss": 0.353, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 0.0001050210339599289, "loss_breakdown/pointer_loss": 1.091191291809082, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 6.0951551859034225e-05, "loss_breakdown/pointer_loss": 0.15631043910980225, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 3.957431181333959e-05, "loss_breakdown/pointer_loss": 0.719907820224762, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 4.52462954854127e-05, "loss_breakdown/pointer_loss": 0.23405170440673828, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 3.6105422623222694e-05, "loss_breakdown/pointer_loss": 0.4217698276042938, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 3.9387574361171573e-05, "loss_breakdown/pointer_loss": 0.38406673073768616, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 7.003123755566776e-05, "loss_breakdown/pointer_loss": 0.43307411670684814, "step": 1240 }, { "epoch": 0.12644835629883622, "loss_breakdown/lm_loss": 5.598608549917117e-05, "loss_breakdown/pointer_loss": 0.49155721068382263, "step": 1240 }, { "epoch": 0.1274681011076978, "grad_norm": 10.090798497344036, "learning_rate": 4.84815864022663e-06, "loss": 0.3261, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 0.0001221006241394207, "loss_breakdown/pointer_loss": 2.266773223876953, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 5.9728597989305854e-05, "loss_breakdown/pointer_loss": 1.4760427474975586, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 4.9606180255068466e-05, "loss_breakdown/pointer_loss": 0.46353843808174133, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 5.180407970328815e-05, "loss_breakdown/pointer_loss": 0.788792073726654, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 5.227127985563129e-05, "loss_breakdown/pointer_loss": 0.6328577399253845, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 3.925437340512872e-05, "loss_breakdown/pointer_loss": 0.2680914103984833, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 3.5783676139544696e-05, "loss_breakdown/pointer_loss": 0.44660553336143494, "step": 1250 }, { "epoch": 0.1274681011076978, "loss_breakdown/lm_loss": 4.6290369937196374e-05, "loss_breakdown/pointer_loss": 0.2905454933643341, "step": 1250 }, { "epoch": 0.12848784591655937, "grad_norm": 4.233144299036986, "learning_rate": 4.842492917847026e-06, "loss": 0.3252, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 3.288726293249056e-05, "loss_breakdown/pointer_loss": 0.3301885724067688, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 4.36216541856993e-05, "loss_breakdown/pointer_loss": 0.18525707721710205, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 2.7614039936452173e-05, "loss_breakdown/pointer_loss": 0.19376611709594727, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 3.126599040115252e-05, "loss_breakdown/pointer_loss": 0.2832622528076172, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 3.7294288631528616e-05, "loss_breakdown/pointer_loss": 0.28075820207595825, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 3.786235174629837e-05, "loss_breakdown/pointer_loss": 0.2728618383407593, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 3.407185067771934e-05, "loss_breakdown/pointer_loss": 0.28623896837234497, "step": 1260 }, { "epoch": 0.12848784591655937, "loss_breakdown/lm_loss": 4.34660141763743e-05, "loss_breakdown/pointer_loss": 0.430093914270401, "step": 1260 }, { "epoch": 0.12950759072542095, "grad_norm": 4.122616867120256, "learning_rate": 4.836827195467422e-06, "loss": 0.3652, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 4.5196382416179404e-05, "loss_breakdown/pointer_loss": 0.3865891098976135, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 5.325528763933107e-05, "loss_breakdown/pointer_loss": 0.46190139651298523, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 3.4559521736809984e-05, "loss_breakdown/pointer_loss": 0.34764671325683594, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 5.738722757087089e-05, "loss_breakdown/pointer_loss": 0.24231795966625214, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 9.666486585047096e-05, "loss_breakdown/pointer_loss": 0.7210824489593506, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 5.139112545293756e-05, "loss_breakdown/pointer_loss": 0.24619123339653015, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 4.5416614739224315e-05, "loss_breakdown/pointer_loss": 0.9312030673027039, "step": 1270 }, { "epoch": 0.12950759072542095, "loss_breakdown/lm_loss": 5.887152292416431e-05, "loss_breakdown/pointer_loss": 0.9074158668518066, "step": 1270 }, { "epoch": 0.13052733553428256, "grad_norm": 13.405801976885163, "learning_rate": 4.8311614730878194e-06, "loss": 0.3428, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 3.524793646647595e-05, "loss_breakdown/pointer_loss": 0.1990102231502533, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 4.913319571642205e-05, "loss_breakdown/pointer_loss": 0.15336352586746216, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 3.566134546417743e-05, "loss_breakdown/pointer_loss": 0.09553602337837219, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 6.529299571411684e-05, "loss_breakdown/pointer_loss": 1.788570523262024, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 6.795556691940874e-05, "loss_breakdown/pointer_loss": 0.2531183958053589, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 7.811388786649331e-05, "loss_breakdown/pointer_loss": 0.10782638192176819, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 6.339385436149314e-05, "loss_breakdown/pointer_loss": 0.3275728225708008, "step": 1280 }, { "epoch": 0.13052733553428256, "loss_breakdown/lm_loss": 0.00010977876809192821, "loss_breakdown/pointer_loss": 0.39838385581970215, "step": 1280 }, { "epoch": 0.13154708034314413, "grad_norm": 7.029044336963556, "learning_rate": 4.825495750708216e-06, "loss": 0.3684, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 4.027675822726451e-05, "loss_breakdown/pointer_loss": 0.3691464960575104, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 3.6571480450220406e-05, "loss_breakdown/pointer_loss": 0.7123551368713379, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 3.7366578908404335e-05, "loss_breakdown/pointer_loss": 0.15891732275485992, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 3.5389704862609506e-05, "loss_breakdown/pointer_loss": 0.1865575909614563, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 5.4240194003796205e-05, "loss_breakdown/pointer_loss": 0.8707544803619385, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 4.282146619516425e-05, "loss_breakdown/pointer_loss": 0.9759706258773804, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 3.661325536086224e-05, "loss_breakdown/pointer_loss": 0.7287992238998413, "step": 1290 }, { "epoch": 0.13154708034314413, "loss_breakdown/lm_loss": 4.2212283005937934e-05, "loss_breakdown/pointer_loss": 0.36841052770614624, "step": 1290 }, { "epoch": 0.1325668251520057, "grad_norm": 3.969851254229787, "learning_rate": 4.8198300283286124e-06, "loss": 0.3125, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 9.071028762264177e-05, "loss_breakdown/pointer_loss": 1.3970850706100464, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 6.0987778851995245e-05, "loss_breakdown/pointer_loss": 0.450924813747406, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 5.244902058620937e-05, "loss_breakdown/pointer_loss": 1.895905613899231, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 4.49059261882212e-05, "loss_breakdown/pointer_loss": 0.5311031341552734, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 4.237536631990224e-05, "loss_breakdown/pointer_loss": 0.8932229280471802, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 6.369422771967947e-05, "loss_breakdown/pointer_loss": 0.9880218505859375, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 4.2124993342440575e-05, "loss_breakdown/pointer_loss": 1.035866379737854, "step": 1300 }, { "epoch": 0.1325668251520057, "loss_breakdown/lm_loss": 3.6689867556560785e-05, "loss_breakdown/pointer_loss": 0.4939173460006714, "step": 1300 }, { "epoch": 0.1335865699608673, "grad_norm": 2.59589763308398, "learning_rate": 4.814164305949009e-06, "loss": 0.318, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 3.313041088404134e-05, "loss_breakdown/pointer_loss": 1.867536187171936, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 3.483323598629795e-05, "loss_breakdown/pointer_loss": 0.39961302280426025, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 2.8856933568022214e-05, "loss_breakdown/pointer_loss": 0.20356318354606628, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 4.748065475723706e-05, "loss_breakdown/pointer_loss": 0.3391081690788269, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 6.506563659058884e-05, "loss_breakdown/pointer_loss": 3.2709975242614746, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 3.471156378509477e-05, "loss_breakdown/pointer_loss": 0.23039105534553528, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 3.948343510273844e-05, "loss_breakdown/pointer_loss": 0.33770185708999634, "step": 1310 }, { "epoch": 0.1335865699608673, "loss_breakdown/lm_loss": 3.326926162117161e-05, "loss_breakdown/pointer_loss": 0.08294163644313812, "step": 1310 }, { "epoch": 0.13460631476972887, "grad_norm": 3.5851008003791636, "learning_rate": 4.8084985835694055e-06, "loss": 0.3475, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 4.1769915696932e-05, "loss_breakdown/pointer_loss": 0.7482264041900635, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 4.6507313527399674e-05, "loss_breakdown/pointer_loss": 0.9744555950164795, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 4.154903217568062e-05, "loss_breakdown/pointer_loss": 0.7465981245040894, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 3.845183891826309e-05, "loss_breakdown/pointer_loss": 0.32096564769744873, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 4.863853973802179e-05, "loss_breakdown/pointer_loss": 0.9943391680717468, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 5.188196519156918e-05, "loss_breakdown/pointer_loss": 0.4389338493347168, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 4.1997664084192365e-05, "loss_breakdown/pointer_loss": 0.274020791053772, "step": 1320 }, { "epoch": 0.13460631476972887, "loss_breakdown/lm_loss": 3.940865644835867e-05, "loss_breakdown/pointer_loss": 0.17084886133670807, "step": 1320 }, { "epoch": 0.13562605957859045, "grad_norm": 43.94117772637985, "learning_rate": 4.802832861189802e-06, "loss": 0.3369, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 3.704354094224982e-05, "loss_breakdown/pointer_loss": 0.15331368148326874, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 4.615371653926559e-05, "loss_breakdown/pointer_loss": 0.9459099769592285, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 3.932060280931182e-05, "loss_breakdown/pointer_loss": 0.20870856940746307, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 4.282455483917147e-05, "loss_breakdown/pointer_loss": 0.09495922923088074, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 3.426670082262717e-05, "loss_breakdown/pointer_loss": 0.3186546564102173, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 3.630495484685525e-05, "loss_breakdown/pointer_loss": 0.4366149604320526, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 3.96734758396633e-05, "loss_breakdown/pointer_loss": 0.6170204877853394, "step": 1330 }, { "epoch": 0.13562605957859045, "loss_breakdown/lm_loss": 3.423478847253136e-05, "loss_breakdown/pointer_loss": 0.3265962600708008, "step": 1330 }, { "epoch": 0.13664580438745205, "grad_norm": 4.030110391760077, "learning_rate": 4.7971671388101985e-06, "loss": 0.3386, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 3.3284162782365456e-05, "loss_breakdown/pointer_loss": 1.0963869094848633, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 3.0108243663562462e-05, "loss_breakdown/pointer_loss": 0.3727388381958008, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 3.02552871289663e-05, "loss_breakdown/pointer_loss": 0.35936710238456726, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 4.04200327466242e-05, "loss_breakdown/pointer_loss": 1.0825504064559937, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 3.409176133573055e-05, "loss_breakdown/pointer_loss": 0.3587902784347534, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 4.944253305438906e-05, "loss_breakdown/pointer_loss": 0.6531224250793457, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 3.1978637707652524e-05, "loss_breakdown/pointer_loss": 0.16652804613113403, "step": 1340 }, { "epoch": 0.13664580438745205, "loss_breakdown/lm_loss": 4.945920954924077e-05, "loss_breakdown/pointer_loss": 1.003543734550476, "step": 1340 }, { "epoch": 0.13766554919631363, "grad_norm": 10.222798607534713, "learning_rate": 4.791501416430595e-06, "loss": 0.3455, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 0.00014143057342153043, "loss_breakdown/pointer_loss": 2.4512031078338623, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 4.793325570062734e-05, "loss_breakdown/pointer_loss": 0.799885094165802, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 6.6348809923511e-05, "loss_breakdown/pointer_loss": 0.6783676147460938, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 6.629156996496022e-05, "loss_breakdown/pointer_loss": 0.7481800317764282, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 0.0001766413333825767, "loss_breakdown/pointer_loss": 1.0110325813293457, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 7.319805445149541e-05, "loss_breakdown/pointer_loss": 0.5061613321304321, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 4.0413371607428417e-05, "loss_breakdown/pointer_loss": 0.8335423469543457, "step": 1350 }, { "epoch": 0.13766554919631363, "loss_breakdown/lm_loss": 4.441829150891863e-05, "loss_breakdown/pointer_loss": 2.530651092529297, "step": 1350 }, { "epoch": 0.1386852940051752, "grad_norm": 3.1203488232539884, "learning_rate": 4.785835694050992e-06, "loss": 0.3367, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 5.371743463911116e-05, "loss_breakdown/pointer_loss": 0.2113463580608368, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 3.796769669861533e-05, "loss_breakdown/pointer_loss": 0.08794917166233063, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 4.111673842999153e-05, "loss_breakdown/pointer_loss": 0.19452182948589325, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 4.5602784666698426e-05, "loss_breakdown/pointer_loss": 0.42986592650413513, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 5.852560207131319e-05, "loss_breakdown/pointer_loss": 0.6295226812362671, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 3.556955198291689e-05, "loss_breakdown/pointer_loss": 0.2084278017282486, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 6.300306995399296e-05, "loss_breakdown/pointer_loss": 0.2291930913925171, "step": 1360 }, { "epoch": 0.1386852940051752, "loss_breakdown/lm_loss": 9.031248191604391e-05, "loss_breakdown/pointer_loss": 0.8162392973899841, "step": 1360 }, { "epoch": 0.13970503881403679, "grad_norm": 2.1796954918893054, "learning_rate": 4.780169971671389e-06, "loss": 0.3424, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 5.4538857511943206e-05, "loss_breakdown/pointer_loss": 0.20110294222831726, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 4.284357783035375e-05, "loss_breakdown/pointer_loss": 0.5713751912117004, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 3.7568926927633584e-05, "loss_breakdown/pointer_loss": 0.7676190137863159, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 3.401881622266956e-05, "loss_breakdown/pointer_loss": 0.5340876579284668, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 4.299193460610695e-05, "loss_breakdown/pointer_loss": 1.1018041372299194, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 6.0352645959937945e-05, "loss_breakdown/pointer_loss": 0.4559866786003113, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 3.7342801078921184e-05, "loss_breakdown/pointer_loss": 0.25558823347091675, "step": 1370 }, { "epoch": 0.13970503881403679, "loss_breakdown/lm_loss": 6.174767622724175e-05, "loss_breakdown/pointer_loss": 0.5892958641052246, "step": 1370 }, { "epoch": 0.14072478362289836, "grad_norm": 9.162944583249104, "learning_rate": 4.774504249291785e-06, "loss": 0.3033, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 3.459263825789094e-05, "loss_breakdown/pointer_loss": 2.666344165802002, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 3.1604678952135146e-05, "loss_breakdown/pointer_loss": 0.6221032738685608, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 3.773502248805016e-05, "loss_breakdown/pointer_loss": 0.27240729331970215, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 5.711097037419677e-05, "loss_breakdown/pointer_loss": 2.317582368850708, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 4.413212445797399e-05, "loss_breakdown/pointer_loss": 0.24231407046318054, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 4.476711910683662e-05, "loss_breakdown/pointer_loss": 0.3361906409263611, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 4.0818529669195414e-05, "loss_breakdown/pointer_loss": 0.5098922252655029, "step": 1380 }, { "epoch": 0.14072478362289836, "loss_breakdown/lm_loss": 3.155700323986821e-05, "loss_breakdown/pointer_loss": 0.15446262061595917, "step": 1380 }, { "epoch": 0.14174452843175994, "grad_norm": 3.2510261156740143, "learning_rate": 4.768838526912182e-06, "loss": 0.3677, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 3.6177021684125066e-05, "loss_breakdown/pointer_loss": 1.1606595516204834, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 4.444325531949289e-05, "loss_breakdown/pointer_loss": 0.9343068599700928, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 4.321932647144422e-05, "loss_breakdown/pointer_loss": 0.5851532220840454, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 3.390325582586229e-05, "loss_breakdown/pointer_loss": 0.21442417800426483, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 3.602512879297137e-05, "loss_breakdown/pointer_loss": 0.41564857959747314, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 3.448503412073478e-05, "loss_breakdown/pointer_loss": 0.1812812238931656, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 3.72345675714314e-05, "loss_breakdown/pointer_loss": 0.46232157945632935, "step": 1390 }, { "epoch": 0.14174452843175994, "loss_breakdown/lm_loss": 4.4787309889215976e-05, "loss_breakdown/pointer_loss": 0.6534491181373596, "step": 1390 }, { "epoch": 0.14276427324062155, "grad_norm": 7.952142132436169, "learning_rate": 4.763172804532578e-06, "loss": 0.3334, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 8.881254325388e-05, "loss_breakdown/pointer_loss": 1.415709137916565, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 5.486259760800749e-05, "loss_breakdown/pointer_loss": 0.9140927195549011, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 5.516514283954166e-05, "loss_breakdown/pointer_loss": 0.6007524728775024, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 0.00011406158591853455, "loss_breakdown/pointer_loss": 1.4236388206481934, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 4.4368785893311724e-05, "loss_breakdown/pointer_loss": 1.243863821029663, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 4.021511267637834e-05, "loss_breakdown/pointer_loss": 0.9779177904129028, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 0.0008525368757545948, "loss_breakdown/pointer_loss": 0.8504444360733032, "step": 1400 }, { "epoch": 0.14276427324062155, "loss_breakdown/lm_loss": 4.5354368921834975e-05, "loss_breakdown/pointer_loss": 0.6210739612579346, "step": 1400 }, { "epoch": 0.14378401804948313, "grad_norm": 4.519300820408836, "learning_rate": 4.757507082152975e-06, "loss": 0.3306, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 4.875754166278057e-05, "loss_breakdown/pointer_loss": 0.22071897983551025, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 4.019715925096534e-05, "loss_breakdown/pointer_loss": 0.22831986844539642, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 3.901011587004177e-05, "loss_breakdown/pointer_loss": 0.11954247206449509, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 4.48447754024528e-05, "loss_breakdown/pointer_loss": 0.11815392225980759, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 4.265234383638017e-05, "loss_breakdown/pointer_loss": 1.3195929527282715, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 6.0945647419430315e-05, "loss_breakdown/pointer_loss": 0.2741573452949524, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 4.514426473178901e-05, "loss_breakdown/pointer_loss": 1.535563588142395, "step": 1410 }, { "epoch": 0.14378401804948313, "loss_breakdown/lm_loss": 4.6813194785499945e-05, "loss_breakdown/pointer_loss": 2.13792085647583, "step": 1410 }, { "epoch": 0.1448037628583447, "grad_norm": 7.518552956826327, "learning_rate": 4.751841359773371e-06, "loss": 0.331, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 6.602302892133594e-05, "loss_breakdown/pointer_loss": 0.46945491433143616, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 5.3785730415256694e-05, "loss_breakdown/pointer_loss": 0.5741807222366333, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 6.137047603260726e-05, "loss_breakdown/pointer_loss": 0.5351006984710693, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 5.060084367869422e-05, "loss_breakdown/pointer_loss": 0.640276312828064, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 6.878611748106778e-05, "loss_breakdown/pointer_loss": 0.11790986359119415, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 4.939704012940638e-05, "loss_breakdown/pointer_loss": 0.31635814905166626, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 4.11191358580254e-05, "loss_breakdown/pointer_loss": 0.24197623133659363, "step": 1420 }, { "epoch": 0.1448037628583447, "loss_breakdown/lm_loss": 4.904588422505185e-05, "loss_breakdown/pointer_loss": 0.23533271253108978, "step": 1420 }, { "epoch": 0.14582350766720628, "grad_norm": 7.002465632172983, "learning_rate": 4.746175637393768e-06, "loss": 0.3012, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 3.347990059410222e-05, "loss_breakdown/pointer_loss": 0.08979009836912155, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 6.391974602593109e-05, "loss_breakdown/pointer_loss": 0.44236284494400024, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 5.5803870054660365e-05, "loss_breakdown/pointer_loss": 0.18412423133850098, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 0.00042753235902637243, "loss_breakdown/pointer_loss": 0.30091550946235657, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 4.9431357183493674e-05, "loss_breakdown/pointer_loss": 0.25194650888442993, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 4.1755916754482314e-05, "loss_breakdown/pointer_loss": 0.20135264098644257, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 4.479468771023676e-05, "loss_breakdown/pointer_loss": 5.351265907287598, "step": 1430 }, { "epoch": 0.14582350766720628, "loss_breakdown/lm_loss": 4.7898440243443474e-05, "loss_breakdown/pointer_loss": 0.30849069356918335, "step": 1430 }, { "epoch": 0.14684325247606786, "grad_norm": 5.674696693910236, "learning_rate": 4.740509915014165e-06, "loss": 0.3384, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 3.1601775845047086e-05, "loss_breakdown/pointer_loss": 0.39447903633117676, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 3.824483428616077e-05, "loss_breakdown/pointer_loss": 0.5305882096290588, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 3.069769445573911e-05, "loss_breakdown/pointer_loss": 0.4528299570083618, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 3.3901658753165975e-05, "loss_breakdown/pointer_loss": 0.24610106647014618, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 5.746957685914822e-05, "loss_breakdown/pointer_loss": 0.6586940884590149, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 3.1099742045626044e-05, "loss_breakdown/pointer_loss": 0.37279924750328064, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 2.7482510631671175e-05, "loss_breakdown/pointer_loss": 0.2154909074306488, "step": 1440 }, { "epoch": 0.14684325247606786, "loss_breakdown/lm_loss": 3.949479287257418e-05, "loss_breakdown/pointer_loss": 0.22264179587364197, "step": 1440 }, { "epoch": 0.14786299728492944, "grad_norm": 5.438260551856639, "learning_rate": 4.734844192634561e-06, "loss": 0.3254, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 0.00013749321806244552, "loss_breakdown/pointer_loss": 2.249758005142212, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 6.57577402307652e-05, "loss_breakdown/pointer_loss": 1.063777208328247, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 5.902059638174251e-05, "loss_breakdown/pointer_loss": 0.9055507183074951, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 4.600988904712722e-05, "loss_breakdown/pointer_loss": 0.8397104740142822, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 4.1160423279507086e-05, "loss_breakdown/pointer_loss": 1.1491167545318604, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 4.0627852285979316e-05, "loss_breakdown/pointer_loss": 0.9476417899131775, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 0.00011313093273201957, "loss_breakdown/pointer_loss": 1.078465223312378, "step": 1450 }, { "epoch": 0.14786299728492944, "loss_breakdown/lm_loss": 6.660848157480359e-05, "loss_breakdown/pointer_loss": 1.1731910705566406, "step": 1450 }, { "epoch": 0.14888274209379102, "grad_norm": 5.103089279599528, "learning_rate": 4.729178470254958e-06, "loss": 0.3271, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 2.55096656474052e-05, "loss_breakdown/pointer_loss": 0.24952088296413422, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 3.060843300772831e-05, "loss_breakdown/pointer_loss": 0.22855620086193085, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 2.694607428566087e-05, "loss_breakdown/pointer_loss": 0.14519697427749634, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 2.859509550035e-05, "loss_breakdown/pointer_loss": 0.3840150237083435, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 2.836854400811717e-05, "loss_breakdown/pointer_loss": 0.12912486493587494, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 3.259218647144735e-05, "loss_breakdown/pointer_loss": 0.37691932916641235, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 4.879142943536863e-05, "loss_breakdown/pointer_loss": 0.7547030448913574, "step": 1460 }, { "epoch": 0.14888274209379102, "loss_breakdown/lm_loss": 4.366282155388035e-05, "loss_breakdown/pointer_loss": 1.0286821126937866, "step": 1460 }, { "epoch": 0.14990248690265262, "grad_norm": 3.6333052835643724, "learning_rate": 4.723512747875355e-06, "loss": 0.3248, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 3.146116068819538e-05, "loss_breakdown/pointer_loss": 0.256816565990448, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 3.521993858157657e-05, "loss_breakdown/pointer_loss": 0.5275102853775024, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 2.8301539714448154e-05, "loss_breakdown/pointer_loss": 0.41239094734191895, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 3.4314642107347026e-05, "loss_breakdown/pointer_loss": 0.6326932311058044, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 2.955500895041041e-05, "loss_breakdown/pointer_loss": 0.5047004818916321, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 4.52530657639727e-05, "loss_breakdown/pointer_loss": 0.3760266602039337, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 3.2711413950892165e-05, "loss_breakdown/pointer_loss": 0.9052809476852417, "step": 1470 }, { "epoch": 0.14990248690265262, "loss_breakdown/lm_loss": 2.920235419878736e-05, "loss_breakdown/pointer_loss": 0.6993669271469116, "step": 1470 }, { "epoch": 0.1509222317115142, "grad_norm": 6.018179177746228, "learning_rate": 4.717847025495751e-06, "loss": 0.3155, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 3.200174614903517e-05, "loss_breakdown/pointer_loss": 0.2609238028526306, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 2.7190362743567675e-05, "loss_breakdown/pointer_loss": 1.7208225727081299, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 2.62446774286218e-05, "loss_breakdown/pointer_loss": 0.6010308265686035, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 3.0794097256148234e-05, "loss_breakdown/pointer_loss": 0.7058805823326111, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 2.3491227693739347e-05, "loss_breakdown/pointer_loss": 1.8730437755584717, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 3.723844565683976e-05, "loss_breakdown/pointer_loss": 0.2654169201850891, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 3.0460480047622696e-05, "loss_breakdown/pointer_loss": 0.13308435678482056, "step": 1480 }, { "epoch": 0.1509222317115142, "loss_breakdown/lm_loss": 2.6959871320286766e-05, "loss_breakdown/pointer_loss": 0.24738860130310059, "step": 1480 }, { "epoch": 0.15194197652037578, "grad_norm": 3.298447583439139, "learning_rate": 4.712181303116148e-06, "loss": 0.3358, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 3.191159339621663e-05, "loss_breakdown/pointer_loss": 0.29312214255332947, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 2.4975193809950724e-05, "loss_breakdown/pointer_loss": 0.4365219473838806, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 3.076219581998885e-05, "loss_breakdown/pointer_loss": 0.3037183880805969, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 3.243417449994013e-05, "loss_breakdown/pointer_loss": 0.2574841380119324, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 2.4864733859431e-05, "loss_breakdown/pointer_loss": 0.4278838634490967, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 2.7167056032340042e-05, "loss_breakdown/pointer_loss": 0.3103892505168915, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 2.6742280169855803e-05, "loss_breakdown/pointer_loss": 0.19660881161689758, "step": 1490 }, { "epoch": 0.15194197652037578, "loss_breakdown/lm_loss": 2.3922928448882885e-05, "loss_breakdown/pointer_loss": 0.18045943975448608, "step": 1490 }, { "epoch": 0.15296172132923735, "grad_norm": 6.013442177057856, "learning_rate": 4.706515580736544e-06, "loss": 0.3256, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 8.063134737312794e-05, "loss_breakdown/pointer_loss": 1.6500935554504395, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 6.025851325830445e-05, "loss_breakdown/pointer_loss": 1.3334909677505493, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 0.00011165336763951927, "loss_breakdown/pointer_loss": 0.6839771866798401, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 4.328371142037213e-05, "loss_breakdown/pointer_loss": 1.1948379278182983, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 0.00010701993596740067, "loss_breakdown/pointer_loss": 1.3200173377990723, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 4.232237915857695e-05, "loss_breakdown/pointer_loss": 0.5567977428436279, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 5.97852558712475e-05, "loss_breakdown/pointer_loss": 0.3825640082359314, "step": 1500 }, { "epoch": 0.15296172132923735, "loss_breakdown/lm_loss": 3.252512760809623e-05, "loss_breakdown/pointer_loss": 0.5486617088317871, "step": 1500 }, { "epoch": 0.15398146613809893, "grad_norm": 6.492759729395175, "learning_rate": 4.700849858356941e-06, "loss": 0.3276, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 2.8582315280800685e-05, "loss_breakdown/pointer_loss": 0.18296411633491516, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 2.8549746275530197e-05, "loss_breakdown/pointer_loss": 0.675450325012207, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 3.537635348038748e-05, "loss_breakdown/pointer_loss": 0.11013153940439224, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 3.2083262340165675e-05, "loss_breakdown/pointer_loss": 0.3278355598449707, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 9.69889952102676e-05, "loss_breakdown/pointer_loss": 0.23897366225719452, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 2.9983430067659356e-05, "loss_breakdown/pointer_loss": 0.2294117659330368, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 3.089122401433997e-05, "loss_breakdown/pointer_loss": 2.951948404312134, "step": 1510 }, { "epoch": 0.15398146613809893, "loss_breakdown/lm_loss": 2.943521030829288e-05, "loss_breakdown/pointer_loss": 0.5210321545600891, "step": 1510 }, { "epoch": 0.1550012109469605, "grad_norm": 2.7219848356412513, "learning_rate": 4.695184135977337e-06, "loss": 0.3474, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 6.636673788307235e-05, "loss_breakdown/pointer_loss": 0.47261226177215576, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 4.852227357332595e-05, "loss_breakdown/pointer_loss": 0.4575998783111572, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 5.566718027694151e-05, "loss_breakdown/pointer_loss": 0.3623393774032593, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 3.506615030346438e-05, "loss_breakdown/pointer_loss": 0.5388891696929932, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 4.0820556023390964e-05, "loss_breakdown/pointer_loss": 0.49924150109291077, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 3.346738594700582e-05, "loss_breakdown/pointer_loss": 0.3381573557853699, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 5.128922202857211e-05, "loss_breakdown/pointer_loss": 0.4666184186935425, "step": 1520 }, { "epoch": 0.1550012109469605, "loss_breakdown/lm_loss": 5.726033487007953e-05, "loss_breakdown/pointer_loss": 0.44254636764526367, "step": 1520 }, { "epoch": 0.15602095575582212, "grad_norm": 13.407198929480845, "learning_rate": 4.689518413597734e-06, "loss": 0.3142, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 4.6563778596464545e-05, "loss_breakdown/pointer_loss": 0.4534907937049866, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 3.4111384593416005e-05, "loss_breakdown/pointer_loss": 0.1296231746673584, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 3.2768894016044214e-05, "loss_breakdown/pointer_loss": 0.16284671425819397, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 4.814759086002596e-05, "loss_breakdown/pointer_loss": 0.22308389842510223, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 2.6932217224384658e-05, "loss_breakdown/pointer_loss": 0.16093139350414276, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 3.3090749639086425e-05, "loss_breakdown/pointer_loss": 0.2489214390516281, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 4.053225347888656e-05, "loss_breakdown/pointer_loss": 0.24115726351737976, "step": 1530 }, { "epoch": 0.15602095575582212, "loss_breakdown/lm_loss": 3.819587072939612e-05, "loss_breakdown/pointer_loss": 2.4752538204193115, "step": 1530 }, { "epoch": 0.1570407005646837, "grad_norm": 3.852331564718616, "learning_rate": 4.683852691218131e-06, "loss": 0.3257, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 2.959818630188238e-05, "loss_breakdown/pointer_loss": 0.2190735638141632, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 3.2619675039313734e-05, "loss_breakdown/pointer_loss": 0.6076250076293945, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 2.5693989300634712e-05, "loss_breakdown/pointer_loss": 2.037067413330078, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 3.0738479836145416e-05, "loss_breakdown/pointer_loss": 1.5479309558868408, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 3.3564992918400094e-05, "loss_breakdown/pointer_loss": 0.1563151478767395, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 2.8913431378896348e-05, "loss_breakdown/pointer_loss": 1.1427425146102905, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 2.737981230893638e-05, "loss_breakdown/pointer_loss": 0.36369577050209045, "step": 1540 }, { "epoch": 0.1570407005646837, "loss_breakdown/lm_loss": 2.9251832529553212e-05, "loss_breakdown/pointer_loss": 0.27927514910697937, "step": 1540 }, { "epoch": 0.15806044537354527, "grad_norm": 13.138053448762921, "learning_rate": 4.6781869688385276e-06, "loss": 0.3065, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 9.240930376108736e-05, "loss_breakdown/pointer_loss": 2.4186081886291504, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 4.635403456632048e-05, "loss_breakdown/pointer_loss": 0.8836372494697571, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 3.39096222887747e-05, "loss_breakdown/pointer_loss": 0.42683708667755127, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 4.469648774829693e-05, "loss_breakdown/pointer_loss": 0.6034119129180908, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 5.009294181945734e-05, "loss_breakdown/pointer_loss": 1.4609150886535645, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 3.381340502528474e-05, "loss_breakdown/pointer_loss": 0.5978885889053345, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 3.361853305250406e-05, "loss_breakdown/pointer_loss": 1.1701042652130127, "step": 1550 }, { "epoch": 0.15806044537354527, "loss_breakdown/lm_loss": 3.4586515539558604e-05, "loss_breakdown/pointer_loss": 0.3710658550262451, "step": 1550 }, { "epoch": 0.15908019018240685, "grad_norm": 6.628452434636642, "learning_rate": 4.672521246458924e-06, "loss": 0.3191, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 2.2431295292335562e-05, "loss_breakdown/pointer_loss": 0.15962225198745728, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 2.9464164981618524e-05, "loss_breakdown/pointer_loss": 0.3444957733154297, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 2.4756556740612723e-05, "loss_breakdown/pointer_loss": 0.30568552017211914, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 4.055996032548137e-05, "loss_breakdown/pointer_loss": 0.2787024974822998, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 3.114377614110708e-05, "loss_breakdown/pointer_loss": 0.10121487826108932, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 2.9713490221183747e-05, "loss_breakdown/pointer_loss": 3.0706536769866943, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 3.0944891477702186e-05, "loss_breakdown/pointer_loss": 0.6679399609565735, "step": 1560 }, { "epoch": 0.15908019018240685, "loss_breakdown/lm_loss": 2.8942700737388805e-05, "loss_breakdown/pointer_loss": 0.17025670409202576, "step": 1560 }, { "epoch": 0.16009993499126843, "grad_norm": 5.082088348385918, "learning_rate": 4.666855524079321e-06, "loss": 0.3367, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 2.8064101570635103e-05, "loss_breakdown/pointer_loss": 0.695889413356781, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 4.588447336573154e-05, "loss_breakdown/pointer_loss": 1.0408109426498413, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 2.804817268042825e-05, "loss_breakdown/pointer_loss": 0.34697169065475464, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 3.805858068517409e-05, "loss_breakdown/pointer_loss": 1.0202025175094604, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 3.1382405722979456e-05, "loss_breakdown/pointer_loss": 0.19768521189689636, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 2.632597716001328e-05, "loss_breakdown/pointer_loss": 0.6273626089096069, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 5.065877121523954e-05, "loss_breakdown/pointer_loss": 0.18194645643234253, "step": 1570 }, { "epoch": 0.16009993499126843, "loss_breakdown/lm_loss": 3.101270704064518e-05, "loss_breakdown/pointer_loss": 0.3421107828617096, "step": 1570 }, { "epoch": 0.16111967980013, "grad_norm": 5.8910630497733765, "learning_rate": 4.661189801699717e-06, "loss": 0.2959, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 2.6558800527709536e-05, "loss_breakdown/pointer_loss": 0.44087451696395874, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 2.6296440410078503e-05, "loss_breakdown/pointer_loss": 0.24975228309631348, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 2.5295074010500684e-05, "loss_breakdown/pointer_loss": 0.2107880413532257, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 2.5191857275785878e-05, "loss_breakdown/pointer_loss": 0.28147047758102417, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 2.9936099963379093e-05, "loss_breakdown/pointer_loss": 0.15520954132080078, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 2.4933729946496896e-05, "loss_breakdown/pointer_loss": 0.5545946359634399, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 3.4811077057383955e-05, "loss_breakdown/pointer_loss": 0.7016113996505737, "step": 1580 }, { "epoch": 0.16111967980013, "loss_breakdown/lm_loss": 4.027645627502352e-05, "loss_breakdown/pointer_loss": 1.466350793838501, "step": 1580 }, { "epoch": 0.1621394246089916, "grad_norm": 5.1387589090958805, "learning_rate": 4.655524079320114e-06, "loss": 0.3315, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.7267078621662222e-05, "loss_breakdown/pointer_loss": 0.645412027835846, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.7125124688609503e-05, "loss_breakdown/pointer_loss": 0.3914022743701935, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.5067834940273315e-05, "loss_breakdown/pointer_loss": 0.5826685428619385, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.3538912500953302e-05, "loss_breakdown/pointer_loss": 0.20334574580192566, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.4378714442718774e-05, "loss_breakdown/pointer_loss": 0.2552069425582886, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 3.00857936963439e-05, "loss_breakdown/pointer_loss": 0.3150362968444824, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.6807852918864228e-05, "loss_breakdown/pointer_loss": 0.17435182631015778, "step": 1590 }, { "epoch": 0.1621394246089916, "loss_breakdown/lm_loss": 2.7176536605111323e-05, "loss_breakdown/pointer_loss": 0.4106907844543457, "step": 1590 }, { "epoch": 0.1631591694178532, "grad_norm": 19.963563129458755, "learning_rate": 4.64985835694051e-06, "loss": 0.326, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 0.0001465425593778491, "loss_breakdown/pointer_loss": 3.1016623973846436, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 4.982956306776032e-05, "loss_breakdown/pointer_loss": 0.8555960655212402, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 3.285283673903905e-05, "loss_breakdown/pointer_loss": 0.9341990947723389, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 3.9310591091634706e-05, "loss_breakdown/pointer_loss": 1.2206776142120361, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 3.3516105759190395e-05, "loss_breakdown/pointer_loss": 1.2437561750411987, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 3.1419534934684634e-05, "loss_breakdown/pointer_loss": 0.6067065596580505, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 4.392305709188804e-05, "loss_breakdown/pointer_loss": 1.0502111911773682, "step": 1600 }, { "epoch": 0.1631591694178532, "loss_breakdown/lm_loss": 3.0401844924199395e-05, "loss_breakdown/pointer_loss": 0.3982979953289032, "step": 1600 }, { "epoch": 0.16417891422671477, "grad_norm": 3.0316628330517097, "learning_rate": 4.644192634560907e-06, "loss": 0.3286, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.7634021535050124e-05, "loss_breakdown/pointer_loss": 0.38519051671028137, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.8260586987016723e-05, "loss_breakdown/pointer_loss": 0.4584828317165375, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.9589986297651194e-05, "loss_breakdown/pointer_loss": 0.352197527885437, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.3839511413825676e-05, "loss_breakdown/pointer_loss": 0.3585396707057953, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.995781869685743e-05, "loss_breakdown/pointer_loss": 0.7214010953903198, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.4460645363433287e-05, "loss_breakdown/pointer_loss": 0.6080294847488403, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.349129499634728e-05, "loss_breakdown/pointer_loss": 1.8317410945892334, "step": 1610 }, { "epoch": 0.16417891422671477, "loss_breakdown/lm_loss": 2.352314186282456e-05, "loss_breakdown/pointer_loss": 0.5087023973464966, "step": 1610 }, { "epoch": 0.16519865903557635, "grad_norm": 2.60264347760952, "learning_rate": 4.638526912181304e-06, "loss": 0.3323, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 6.140184996183962e-05, "loss_breakdown/pointer_loss": 0.4037538468837738, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 4.093800816917792e-05, "loss_breakdown/pointer_loss": 0.5993201732635498, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 3.508740337565541e-05, "loss_breakdown/pointer_loss": 0.5113203525543213, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 2.804680661938619e-05, "loss_breakdown/pointer_loss": 1.6830592155456543, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 3.329227547510527e-05, "loss_breakdown/pointer_loss": 0.46214890480041504, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 3.5356068110559136e-05, "loss_breakdown/pointer_loss": 0.46778666973114014, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 3.733851190190762e-05, "loss_breakdown/pointer_loss": 0.8459805846214294, "step": 1620 }, { "epoch": 0.16519865903557635, "loss_breakdown/lm_loss": 3.098143861279823e-05, "loss_breakdown/pointer_loss": 0.5433122515678406, "step": 1620 }, { "epoch": 0.16621840384443792, "grad_norm": 9.839318668181901, "learning_rate": 4.6328611898017005e-06, "loss": 0.2984, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 3.336843292345293e-05, "loss_breakdown/pointer_loss": 0.19088034331798553, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 2.3066122594173066e-05, "loss_breakdown/pointer_loss": 0.26395171880722046, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 2.891066651500296e-05, "loss_breakdown/pointer_loss": 0.32130229473114014, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 3.177954567945562e-05, "loss_breakdown/pointer_loss": 0.22513985633850098, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 2.3145485101849772e-05, "loss_breakdown/pointer_loss": 0.3831093907356262, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 3.9903843571664765e-05, "loss_breakdown/pointer_loss": 0.2511688768863678, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 3.4572640288388357e-05, "loss_breakdown/pointer_loss": 0.4741383194923401, "step": 1630 }, { "epoch": 0.16621840384443792, "loss_breakdown/lm_loss": 2.9105483918101527e-05, "loss_breakdown/pointer_loss": 0.3720557391643524, "step": 1630 }, { "epoch": 0.1672381486532995, "grad_norm": 2.531280528441712, "learning_rate": 4.627195467422096e-06, "loss": 0.3439, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 3.4819680877262726e-05, "loss_breakdown/pointer_loss": 0.5322779417037964, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.2392398022930138e-05, "loss_breakdown/pointer_loss": 1.022498369216919, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.7050275093642995e-05, "loss_breakdown/pointer_loss": 0.23388972878456116, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.4507322450517677e-05, "loss_breakdown/pointer_loss": 0.3927001655101776, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.8246162401046604e-05, "loss_breakdown/pointer_loss": 0.6692107915878296, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.3414335373672657e-05, "loss_breakdown/pointer_loss": 0.54820317029953, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.356795630475972e-05, "loss_breakdown/pointer_loss": 0.2642894387245178, "step": 1640 }, { "epoch": 0.1672381486532995, "loss_breakdown/lm_loss": 2.3948075977386907e-05, "loss_breakdown/pointer_loss": 0.1133878082036972, "step": 1640 }, { "epoch": 0.1682578934621611, "grad_norm": 14.32386869851656, "learning_rate": 4.6215297450424935e-06, "loss": 0.3271, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 0.00031795012182556093, "loss_breakdown/pointer_loss": 1.6551762819290161, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 5.432866237242706e-05, "loss_breakdown/pointer_loss": 0.5953344106674194, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 4.085575710632838e-05, "loss_breakdown/pointer_loss": 0.7365550994873047, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 3.5376731830183417e-05, "loss_breakdown/pointer_loss": 0.4025494456291199, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 5.171902012079954e-05, "loss_breakdown/pointer_loss": 1.3629711866378784, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 5.2321876864880323e-05, "loss_breakdown/pointer_loss": 1.0640264749526978, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 3.953030682168901e-05, "loss_breakdown/pointer_loss": 0.4968433976173401, "step": 1650 }, { "epoch": 0.1682578934621611, "loss_breakdown/lm_loss": 3.353587817400694e-05, "loss_breakdown/pointer_loss": 0.830254077911377, "step": 1650 }, { "epoch": 0.16927763827102268, "grad_norm": 4.483308615505154, "learning_rate": 4.61586402266289e-06, "loss": 0.3172, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 2.608764225442428e-05, "loss_breakdown/pointer_loss": 0.3491383194923401, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 2.6156656531384215e-05, "loss_breakdown/pointer_loss": 0.31167763471603394, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 2.9193413865868933e-05, "loss_breakdown/pointer_loss": 0.18840822577476501, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 2.5587827622075565e-05, "loss_breakdown/pointer_loss": 0.24367529153823853, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 2.6353343855589628e-05, "loss_breakdown/pointer_loss": 0.31660664081573486, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 4.560077650239691e-05, "loss_breakdown/pointer_loss": 0.4633029103279114, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 2.7281776056042872e-05, "loss_breakdown/pointer_loss": 2.191678762435913, "step": 1660 }, { "epoch": 0.16927763827102268, "loss_breakdown/lm_loss": 6.494391709566116e-05, "loss_breakdown/pointer_loss": 0.3511796295642853, "step": 1660 }, { "epoch": 0.17029738307988426, "grad_norm": 3.009690464679745, "learning_rate": 4.6101983002832865e-06, "loss": 0.3186, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 3.44548279826995e-05, "loss_breakdown/pointer_loss": 0.16194751858711243, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 2.4998555090860464e-05, "loss_breakdown/pointer_loss": 0.6079349517822266, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 2.44200236920733e-05, "loss_breakdown/pointer_loss": 0.2119612842798233, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 3.3947078918572515e-05, "loss_breakdown/pointer_loss": 0.5309884548187256, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 2.8076199669158086e-05, "loss_breakdown/pointer_loss": 0.8118985891342163, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 2.8604734325199388e-05, "loss_breakdown/pointer_loss": 0.29823431372642517, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 2.8115651730331592e-05, "loss_breakdown/pointer_loss": 1.1929291486740112, "step": 1670 }, { "epoch": 0.17029738307988426, "loss_breakdown/lm_loss": 3.115513391094282e-05, "loss_breakdown/pointer_loss": 0.3350711464881897, "step": 1670 }, { "epoch": 0.17131712788874584, "grad_norm": 6.218422793374475, "learning_rate": 4.604532577903683e-06, "loss": 0.3048, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 3.4256252547493204e-05, "loss_breakdown/pointer_loss": 1.6710517406463623, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 3.426244802540168e-05, "loss_breakdown/pointer_loss": 4.295599937438965, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 6.477170973084867e-05, "loss_breakdown/pointer_loss": 0.2874221205711365, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 3.5605800803750753e-05, "loss_breakdown/pointer_loss": 0.7801831960678101, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 2.641152786964085e-05, "loss_breakdown/pointer_loss": 0.06195729225873947, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 3.1656316423323005e-05, "loss_breakdown/pointer_loss": 0.26111823320388794, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 2.6328129024477676e-05, "loss_breakdown/pointer_loss": 7.874295234680176, "step": 1680 }, { "epoch": 0.17131712788874584, "loss_breakdown/lm_loss": 3.3070198696805164e-05, "loss_breakdown/pointer_loss": 0.09017951786518097, "step": 1680 }, { "epoch": 0.17233687269760742, "grad_norm": 4.840973424090649, "learning_rate": 4.5988668555240795e-06, "loss": 0.3502, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 2.4374970962526277e-05, "loss_breakdown/pointer_loss": 0.2679020166397095, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 3.629170896601863e-05, "loss_breakdown/pointer_loss": 0.26239532232284546, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 2.549805140006356e-05, "loss_breakdown/pointer_loss": 0.6307716369628906, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 2.3766066078678705e-05, "loss_breakdown/pointer_loss": 1.8283179998397827, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 3.605850724852644e-05, "loss_breakdown/pointer_loss": 0.2639022171497345, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 2.6853560484596528e-05, "loss_breakdown/pointer_loss": 0.7628446221351624, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 2.8487407689681277e-05, "loss_breakdown/pointer_loss": 0.25746864080429077, "step": 1690 }, { "epoch": 0.17233687269760742, "loss_breakdown/lm_loss": 2.185734956583474e-05, "loss_breakdown/pointer_loss": 0.2795686721801758, "step": 1690 }, { "epoch": 0.173356617506469, "grad_norm": 6.303165594595121, "learning_rate": 4.593201133144477e-06, "loss": 0.3315, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 8.13374063000083e-05, "loss_breakdown/pointer_loss": 1.3823491334915161, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 7.23554621799849e-05, "loss_breakdown/pointer_loss": 0.5456899404525757, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 5.4886782891117036e-05, "loss_breakdown/pointer_loss": 0.398610919713974, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 3.020523399754893e-05, "loss_breakdown/pointer_loss": 0.585091233253479, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 2.703313839447219e-05, "loss_breakdown/pointer_loss": 0.7420447468757629, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 2.5980643840739504e-05, "loss_breakdown/pointer_loss": 0.3487510085105896, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 3.4658292861422524e-05, "loss_breakdown/pointer_loss": 0.6508355736732483, "step": 1700 }, { "epoch": 0.173356617506469, "loss_breakdown/lm_loss": 2.2981297661317512e-05, "loss_breakdown/pointer_loss": 0.46385788917541504, "step": 1700 }, { "epoch": 0.1743763623153306, "grad_norm": 3.5132436702214296, "learning_rate": 4.5875354107648725e-06, "loss": 0.3193, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 2.2043723220122047e-05, "loss_breakdown/pointer_loss": 0.2765263319015503, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 1.9615226847236045e-05, "loss_breakdown/pointer_loss": 0.1514938771724701, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 2.1386935259215534e-05, "loss_breakdown/pointer_loss": 0.19685447216033936, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 2.3618275008630008e-05, "loss_breakdown/pointer_loss": 0.2901071012020111, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 3.0293207601062022e-05, "loss_breakdown/pointer_loss": 0.22770512104034424, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 2.053110620181542e-05, "loss_breakdown/pointer_loss": 2.722087860107422, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 2.463562850607559e-05, "loss_breakdown/pointer_loss": 0.11967647820711136, "step": 1710 }, { "epoch": 0.1743763623153306, "loss_breakdown/lm_loss": 3.61619058821816e-05, "loss_breakdown/pointer_loss": 0.13296079635620117, "step": 1710 }, { "epoch": 0.17539610712419218, "grad_norm": 7.2110103686150016, "learning_rate": 4.581869688385269e-06, "loss": 0.3498, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 3.787845707847737e-05, "loss_breakdown/pointer_loss": 0.5333424210548401, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 3.268328509875573e-05, "loss_breakdown/pointer_loss": 0.26496344804763794, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 3.544617720763199e-05, "loss_breakdown/pointer_loss": 0.5464821457862854, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 2.7783380573964678e-05, "loss_breakdown/pointer_loss": 0.746652364730835, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 3.6629073292715475e-05, "loss_breakdown/pointer_loss": 0.41721901297569275, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 3.4222084650537e-05, "loss_breakdown/pointer_loss": 0.7299100160598755, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 4.603346314979717e-05, "loss_breakdown/pointer_loss": 0.4260615408420563, "step": 1720 }, { "epoch": 0.17539610712419218, "loss_breakdown/lm_loss": 3.183272565365769e-05, "loss_breakdown/pointer_loss": 0.5009092688560486, "step": 1720 }, { "epoch": 0.17641585193305376, "grad_norm": 124.2399566173907, "learning_rate": 4.576203966005666e-06, "loss": 0.3007, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 2.304619010828901e-05, "loss_breakdown/pointer_loss": 0.4090668857097626, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 7.89019904914312e-05, "loss_breakdown/pointer_loss": 3.8535683155059814, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 3.019829819095321e-05, "loss_breakdown/pointer_loss": 0.5175714492797852, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 5.1264101784909144e-05, "loss_breakdown/pointer_loss": 0.5636700987815857, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 3.313431079732254e-05, "loss_breakdown/pointer_loss": 0.7207555770874023, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 2.10714806598844e-05, "loss_breakdown/pointer_loss": 0.13446654379367828, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 2.4488545022904873e-05, "loss_breakdown/pointer_loss": 0.17400772869586945, "step": 1730 }, { "epoch": 0.17641585193305376, "loss_breakdown/lm_loss": 3.591058703023009e-05, "loss_breakdown/pointer_loss": 0.12230294942855835, "step": 1730 }, { "epoch": 0.17743559674191534, "grad_norm": 7.801812270004489, "learning_rate": 4.570538243626063e-06, "loss": 0.332, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 3.437600753386505e-05, "loss_breakdown/pointer_loss": 0.868517279624939, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 2.5385892513440922e-05, "loss_breakdown/pointer_loss": 0.5112552046775818, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 3.441238004597835e-05, "loss_breakdown/pointer_loss": 0.2573046088218689, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 3.5326098441146314e-05, "loss_breakdown/pointer_loss": 0.6964327692985535, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 2.9327295123948716e-05, "loss_breakdown/pointer_loss": 0.5269364714622498, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 2.2520593120134436e-05, "loss_breakdown/pointer_loss": 0.5396877527236938, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 2.830786252161488e-05, "loss_breakdown/pointer_loss": 0.22593079507350922, "step": 1740 }, { "epoch": 0.17743559674191534, "loss_breakdown/lm_loss": 2.093843067996204e-05, "loss_breakdown/pointer_loss": 0.1860969066619873, "step": 1740 }, { "epoch": 0.1784553415507769, "grad_norm": 7.969187614716842, "learning_rate": 4.564872521246459e-06, "loss": 0.3266, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 0.00019090868590865284, "loss_breakdown/pointer_loss": 1.7418324947357178, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 5.406688433140516e-05, "loss_breakdown/pointer_loss": 1.1333624124526978, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 6.686912092845887e-05, "loss_breakdown/pointer_loss": 0.8553016781806946, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 5.7001969253178686e-05, "loss_breakdown/pointer_loss": 0.4212201237678528, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 4.5225489884614944e-05, "loss_breakdown/pointer_loss": 0.6441062092781067, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 7.962311065057293e-05, "loss_breakdown/pointer_loss": 0.5983664989471436, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 4.525177428149618e-05, "loss_breakdown/pointer_loss": 0.531047523021698, "step": 1750 }, { "epoch": 0.1784553415507769, "loss_breakdown/lm_loss": 3.5789318644674495e-05, "loss_breakdown/pointer_loss": 0.3914738595485687, "step": 1750 }, { "epoch": 0.1794750863596385, "grad_norm": 4.321891818759699, "learning_rate": 4.559206798866856e-06, "loss": 0.3193, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 2.2360123693943024e-05, "loss_breakdown/pointer_loss": 0.49936002492904663, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 2.7355554266250692e-05, "loss_breakdown/pointer_loss": 0.1937103122472763, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 2.3223203243105672e-05, "loss_breakdown/pointer_loss": 0.3344340920448303, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 2.012765980907716e-05, "loss_breakdown/pointer_loss": 0.3553716540336609, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 4.9044763727579266e-05, "loss_breakdown/pointer_loss": 2.4918439388275146, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 2.3369284463115036e-05, "loss_breakdown/pointer_loss": 0.3373172879219055, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 5.641719326376915e-05, "loss_breakdown/pointer_loss": 2.656860113143921, "step": 1760 }, { "epoch": 0.1794750863596385, "loss_breakdown/lm_loss": 2.324070010217838e-05, "loss_breakdown/pointer_loss": 0.2909386157989502, "step": 1760 }, { "epoch": 0.18049483116850007, "grad_norm": 3.3422849180987932, "learning_rate": 4.553541076487252e-06, "loss": 0.3378, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 2.9139682737877592e-05, "loss_breakdown/pointer_loss": 0.4784301817417145, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 4.891329081146978e-05, "loss_breakdown/pointer_loss": 0.3439498543739319, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 2.2252192138694227e-05, "loss_breakdown/pointer_loss": 0.6611537337303162, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 2.4626751837786287e-05, "loss_breakdown/pointer_loss": 0.252733051776886, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 2.413166657788679e-05, "loss_breakdown/pointer_loss": 0.47764527797698975, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 3.0344606784638017e-05, "loss_breakdown/pointer_loss": 0.36125481128692627, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 2.179580769734457e-05, "loss_breakdown/pointer_loss": 0.2227678745985031, "step": 1770 }, { "epoch": 0.18049483116850007, "loss_breakdown/lm_loss": 3.090415339102037e-05, "loss_breakdown/pointer_loss": 0.7152037620544434, "step": 1770 }, { "epoch": 0.18151457597736168, "grad_norm": 9.132597559102958, "learning_rate": 4.547875354107649e-06, "loss": 0.3147, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 2.672534537850879e-05, "loss_breakdown/pointer_loss": 2.5654165744781494, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 2.503282121324446e-05, "loss_breakdown/pointer_loss": 0.4727647006511688, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 4.416484807734378e-05, "loss_breakdown/pointer_loss": 0.14154788851737976, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 2.6614176022121683e-05, "loss_breakdown/pointer_loss": 0.154435396194458, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 4.437805182533339e-05, "loss_breakdown/pointer_loss": 0.1776142418384552, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 8.873140177456662e-05, "loss_breakdown/pointer_loss": 0.353069543838501, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 3.853721864288673e-05, "loss_breakdown/pointer_loss": 3.6349072456359863, "step": 1780 }, { "epoch": 0.18151457597736168, "loss_breakdown/lm_loss": 2.989594213431701e-05, "loss_breakdown/pointer_loss": 0.07720809429883957, "step": 1780 }, { "epoch": 0.18253432078622325, "grad_norm": 2.8046722408991447, "learning_rate": 4.542209631728045e-06, "loss": 0.3277, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 3.089140955125913e-05, "loss_breakdown/pointer_loss": 0.1860784888267517, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 3.278114309068769e-05, "loss_breakdown/pointer_loss": 0.5569249987602234, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 2.683364800759591e-05, "loss_breakdown/pointer_loss": 1.6415820121765137, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 3.18845413858071e-05, "loss_breakdown/pointer_loss": 0.6909919381141663, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 5.427906580734998e-05, "loss_breakdown/pointer_loss": 0.7204269766807556, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 3.74643350369297e-05, "loss_breakdown/pointer_loss": 0.4697082042694092, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 2.9131660994607955e-05, "loss_breakdown/pointer_loss": 0.29731279611587524, "step": 1790 }, { "epoch": 0.18253432078622325, "loss_breakdown/lm_loss": 2.1544767150771804e-05, "loss_breakdown/pointer_loss": 0.32076865434646606, "step": 1790 }, { "epoch": 0.18355406559508483, "grad_norm": 8.030647912468043, "learning_rate": 4.536543909348442e-06, "loss": 0.3077, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 0.00022441314649768174, "loss_breakdown/pointer_loss": 1.3671174049377441, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 9.468508505960926e-05, "loss_breakdown/pointer_loss": 0.9885131120681763, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 6.204012606758624e-05, "loss_breakdown/pointer_loss": 0.9929527640342712, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 5.8699479268398136e-05, "loss_breakdown/pointer_loss": 0.9109125137329102, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 6.492719694506377e-05, "loss_breakdown/pointer_loss": 0.39027079939842224, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 3.8989546737866476e-05, "loss_breakdown/pointer_loss": 0.8954788446426392, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 2.6906149287242442e-05, "loss_breakdown/pointer_loss": 0.7468400597572327, "step": 1800 }, { "epoch": 0.18355406559508483, "loss_breakdown/lm_loss": 4.510437065619044e-05, "loss_breakdown/pointer_loss": 0.4627777636051178, "step": 1800 }, { "epoch": 0.1845738104039464, "grad_norm": 6.015826493111002, "learning_rate": 4.530878186968839e-06, "loss": 0.3169, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 2.4971419406938367e-05, "loss_breakdown/pointer_loss": 0.4846924841403961, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 3.3033069485099986e-05, "loss_breakdown/pointer_loss": 0.7621488571166992, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 3.7442521716002375e-05, "loss_breakdown/pointer_loss": 2.571707010269165, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 3.3920136047527194e-05, "loss_breakdown/pointer_loss": 0.7290680408477783, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 4.3057214497821406e-05, "loss_breakdown/pointer_loss": 0.43975967168807983, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 2.9647097107954323e-05, "loss_breakdown/pointer_loss": 0.3492507338523865, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 3.180330168106593e-05, "loss_breakdown/pointer_loss": 3.4944376945495605, "step": 1810 }, { "epoch": 0.1845738104039464, "loss_breakdown/lm_loss": 3.269325679866597e-05, "loss_breakdown/pointer_loss": 0.7332383394241333, "step": 1810 }, { "epoch": 0.185593555212808, "grad_norm": 5.470780302285162, "learning_rate": 4.525212464589236e-06, "loss": 0.3657, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 3.441575972829014e-05, "loss_breakdown/pointer_loss": 0.6421316266059875, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 3.285002094344236e-05, "loss_breakdown/pointer_loss": 0.41291022300720215, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 3.772281343117356e-05, "loss_breakdown/pointer_loss": 0.19142165780067444, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 3.217583434889093e-05, "loss_breakdown/pointer_loss": 0.5837001800537109, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 4.633217031368986e-05, "loss_breakdown/pointer_loss": 0.3939131200313568, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 3.671102604130283e-05, "loss_breakdown/pointer_loss": 1.0324187278747559, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 3.383955481695011e-05, "loss_breakdown/pointer_loss": 0.5948551893234253, "step": 1820 }, { "epoch": 0.185593555212808, "loss_breakdown/lm_loss": 4.78762412967626e-05, "loss_breakdown/pointer_loss": 0.3151084780693054, "step": 1820 }, { "epoch": 0.18661330002166956, "grad_norm": 11.2456287968307, "learning_rate": 4.519546742209632e-06, "loss": 0.3147, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 2.711868000915274e-05, "loss_breakdown/pointer_loss": 0.10949347913265228, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 2.7782281904364936e-05, "loss_breakdown/pointer_loss": 0.27300167083740234, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 2.5755924070836045e-05, "loss_breakdown/pointer_loss": 0.418163001537323, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 2.5740180717548355e-05, "loss_breakdown/pointer_loss": 4.792580604553223, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 3.8728383515262976e-05, "loss_breakdown/pointer_loss": 0.10053230822086334, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 2.8716092856484465e-05, "loss_breakdown/pointer_loss": 0.09387657046318054, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 2.5016988729475997e-05, "loss_breakdown/pointer_loss": 0.41398942470550537, "step": 1830 }, { "epoch": 0.18661330002166956, "loss_breakdown/lm_loss": 6.983638741075993e-05, "loss_breakdown/pointer_loss": 0.44981539249420166, "step": 1830 }, { "epoch": 0.18763304483053117, "grad_norm": 4.599979268958567, "learning_rate": 4.513881019830029e-06, "loss": 0.3504, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 4.4765034544980153e-05, "loss_breakdown/pointer_loss": 1.2344326972961426, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 3.453663885011338e-05, "loss_breakdown/pointer_loss": 0.943477988243103, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 3.592349457903765e-05, "loss_breakdown/pointer_loss": 0.31739842891693115, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 2.9902310416218825e-05, "loss_breakdown/pointer_loss": 0.10140784084796906, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 2.56446855928516e-05, "loss_breakdown/pointer_loss": 0.1564898043870926, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 2.81481279671425e-05, "loss_breakdown/pointer_loss": 0.3283959925174713, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 2.9789180189254694e-05, "loss_breakdown/pointer_loss": 0.7157142758369446, "step": 1840 }, { "epoch": 0.18763304483053117, "loss_breakdown/lm_loss": 5.72860190004576e-05, "loss_breakdown/pointer_loss": 0.44420093297958374, "step": 1840 }, { "epoch": 0.18865278963939275, "grad_norm": 13.071286654645576, "learning_rate": 4.508215297450425e-06, "loss": 0.3162, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 0.00016619529924355447, "loss_breakdown/pointer_loss": 2.077249526977539, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 5.9391517424955964e-05, "loss_breakdown/pointer_loss": 1.0462002754211426, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 0.00010125919652637094, "loss_breakdown/pointer_loss": 0.5087765455245972, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 4.14290334447287e-05, "loss_breakdown/pointer_loss": 0.6098998188972473, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 3.607747203204781e-05, "loss_breakdown/pointer_loss": 0.6873303055763245, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 4.7628956963308156e-05, "loss_breakdown/pointer_loss": 0.5908200740814209, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 2.548550946812611e-05, "loss_breakdown/pointer_loss": 0.2916772663593292, "step": 1850 }, { "epoch": 0.18865278963939275, "loss_breakdown/lm_loss": 4.277996049495414e-05, "loss_breakdown/pointer_loss": 0.6930313110351562, "step": 1850 }, { "epoch": 0.18967253444825433, "grad_norm": 5.846731377340613, "learning_rate": 4.502549575070822e-06, "loss": 0.3518, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 2.3341644919128157e-05, "loss_breakdown/pointer_loss": 0.5694283246994019, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 2.539364322728943e-05, "loss_breakdown/pointer_loss": 0.33708053827285767, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 3.5749326343648136e-05, "loss_breakdown/pointer_loss": 0.3008624315261841, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 2.8958536859136075e-05, "loss_breakdown/pointer_loss": 1.3442531824111938, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 2.2497868485515937e-05, "loss_breakdown/pointer_loss": 0.13739073276519775, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 2.7810589017462917e-05, "loss_breakdown/pointer_loss": 0.37220412492752075, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 4.242009890731424e-05, "loss_breakdown/pointer_loss": 1.9039475917816162, "step": 1860 }, { "epoch": 0.18967253444825433, "loss_breakdown/lm_loss": 2.2640977476839907e-05, "loss_breakdown/pointer_loss": 0.15052852034568787, "step": 1860 }, { "epoch": 0.1906922792571159, "grad_norm": 4.435606144451359, "learning_rate": 4.496883852691218e-06, "loss": 0.334, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 2.9988550522830337e-05, "loss_breakdown/pointer_loss": 0.6826391220092773, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 3.2022606319515035e-05, "loss_breakdown/pointer_loss": 0.32478636503219604, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 3.140161061310209e-05, "loss_breakdown/pointer_loss": 0.18057256937026978, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 3.211875446140766e-05, "loss_breakdown/pointer_loss": 0.6911243796348572, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 2.2180336600285955e-05, "loss_breakdown/pointer_loss": 0.6385858654975891, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 5.697724554920569e-05, "loss_breakdown/pointer_loss": 0.4273400902748108, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 2.610763840493746e-05, "loss_breakdown/pointer_loss": 0.4228796064853668, "step": 1870 }, { "epoch": 0.1906922792571159, "loss_breakdown/lm_loss": 2.1837016902281903e-05, "loss_breakdown/pointer_loss": 0.3970898985862732, "step": 1870 }, { "epoch": 0.19171202406597748, "grad_norm": 9.212321654470573, "learning_rate": 4.491218130311616e-06, "loss": 0.3081, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.4854143703123555e-05, "loss_breakdown/pointer_loss": 0.27358782291412354, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.7349153242539614e-05, "loss_breakdown/pointer_loss": 0.3229275643825531, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.5787792765186168e-05, "loss_breakdown/pointer_loss": 2.585247755050659, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.8703994757961482e-05, "loss_breakdown/pointer_loss": 0.17752355337142944, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.560113716754131e-05, "loss_breakdown/pointer_loss": 0.1787055879831314, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.7133040930493735e-05, "loss_breakdown/pointer_loss": 0.2745681703090668, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 3.5999095416627824e-05, "loss_breakdown/pointer_loss": 0.8792011141777039, "step": 1880 }, { "epoch": 0.19171202406597748, "loss_breakdown/lm_loss": 2.1945514163235202e-05, "loss_breakdown/pointer_loss": 0.12260079383850098, "step": 1880 }, { "epoch": 0.19273176887483906, "grad_norm": 19.270187943346134, "learning_rate": 4.485552407932012e-06, "loss": 0.3599, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.380107980570756e-05, "loss_breakdown/pointer_loss": 0.3564499616622925, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 1.9876119040418416e-05, "loss_breakdown/pointer_loss": 0.21807323396205902, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.9294356863829307e-05, "loss_breakdown/pointer_loss": 0.32154667377471924, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.3846292606322095e-05, "loss_breakdown/pointer_loss": 0.2596513032913208, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.3134678485803306e-05, "loss_breakdown/pointer_loss": 0.4263496398925781, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.1418114556581713e-05, "loss_breakdown/pointer_loss": 1.1165932416915894, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.566542571003083e-05, "loss_breakdown/pointer_loss": 1.7549070119857788, "step": 1890 }, { "epoch": 0.19273176887483906, "loss_breakdown/lm_loss": 2.5111152353929356e-05, "loss_breakdown/pointer_loss": 0.1786249577999115, "step": 1890 }, { "epoch": 0.19375151368370067, "grad_norm": 10.664307483712102, "learning_rate": 4.479886685552408e-06, "loss": 0.303, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 0.00013093711459077895, "loss_breakdown/pointer_loss": 1.6155798435211182, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 8.530525519745424e-05, "loss_breakdown/pointer_loss": 2.0238544940948486, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 4.789828017237596e-05, "loss_breakdown/pointer_loss": 0.9035738110542297, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 3.796177770709619e-05, "loss_breakdown/pointer_loss": 0.43041566014289856, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 4.618274397216737e-05, "loss_breakdown/pointer_loss": 1.010545015335083, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 3.076713255723007e-05, "loss_breakdown/pointer_loss": 1.3386648893356323, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 4.208641985314898e-05, "loss_breakdown/pointer_loss": 1.4011273384094238, "step": 1900 }, { "epoch": 0.19375151368370067, "loss_breakdown/lm_loss": 4.3506177462404594e-05, "loss_breakdown/pointer_loss": 0.7284778356552124, "step": 1900 }, { "epoch": 0.19477125849256224, "grad_norm": 3.9639047703856605, "learning_rate": 4.474220963172805e-06, "loss": 0.315, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 5.342260919860564e-05, "loss_breakdown/pointer_loss": 0.3668404519557953, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 2.5440789613639936e-05, "loss_breakdown/pointer_loss": 0.1315295398235321, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 2.6373852961114608e-05, "loss_breakdown/pointer_loss": 0.7915478944778442, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 2.2755999452783726e-05, "loss_breakdown/pointer_loss": 0.41343623399734497, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 2.5916418962879106e-05, "loss_breakdown/pointer_loss": 0.07285991311073303, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 3.7236935895634815e-05, "loss_breakdown/pointer_loss": 0.26051074266433716, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 3.0321998565341346e-05, "loss_breakdown/pointer_loss": 0.31136542558670044, "step": 1910 }, { "epoch": 0.19477125849256224, "loss_breakdown/lm_loss": 2.4651411877130158e-05, "loss_breakdown/pointer_loss": 0.13118857145309448, "step": 1910 }, { "epoch": 0.19579100330142382, "grad_norm": 4.665978887463505, "learning_rate": 4.468555240793202e-06, "loss": 0.3272, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 2.389403380220756e-05, "loss_breakdown/pointer_loss": 0.6076685786247253, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 5.5046661145752296e-05, "loss_breakdown/pointer_loss": 0.3745371699333191, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 2.559291897341609e-05, "loss_breakdown/pointer_loss": 0.3798195719718933, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 2.6122546842088923e-05, "loss_breakdown/pointer_loss": 0.7221803665161133, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 2.221643262600992e-05, "loss_breakdown/pointer_loss": 0.35275065898895264, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 2.482280797266867e-05, "loss_breakdown/pointer_loss": 1.3385179042816162, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 3.256123454775661e-05, "loss_breakdown/pointer_loss": 0.6508550643920898, "step": 1920 }, { "epoch": 0.19579100330142382, "loss_breakdown/lm_loss": 2.920092265412677e-05, "loss_breakdown/pointer_loss": 0.7232987284660339, "step": 1920 }, { "epoch": 0.1968107481102854, "grad_norm": 8.139719017077676, "learning_rate": 4.462889518413598e-06, "loss": 0.3196, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 2.3443337340722792e-05, "loss_breakdown/pointer_loss": 1.5082303285598755, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 3.344005381222814e-05, "loss_breakdown/pointer_loss": 0.15189290046691895, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 3.958243542001583e-05, "loss_breakdown/pointer_loss": 0.4953748881816864, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 2.997977026097942e-05, "loss_breakdown/pointer_loss": 0.4551606774330139, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 2.9617769541800953e-05, "loss_breakdown/pointer_loss": 0.5478184223175049, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 5.866797437192872e-05, "loss_breakdown/pointer_loss": 0.17497044801712036, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 5.606693230220117e-05, "loss_breakdown/pointer_loss": 0.3567636013031006, "step": 1930 }, { "epoch": 0.1968107481102854, "loss_breakdown/lm_loss": 3.611004649428651e-05, "loss_breakdown/pointer_loss": 0.3811776638031006, "step": 1930 }, { "epoch": 0.19783049291914698, "grad_norm": 6.498904697673981, "learning_rate": 4.457223796033995e-06, "loss": 0.3315, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 1.9985822291346267e-05, "loss_breakdown/pointer_loss": 0.2196630835533142, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 2.273353675263934e-05, "loss_breakdown/pointer_loss": 0.22992195188999176, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 2.1760946765425615e-05, "loss_breakdown/pointer_loss": 0.7900298237800598, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 1.871915446827188e-05, "loss_breakdown/pointer_loss": 0.3323932886123657, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 2.6646690457710065e-05, "loss_breakdown/pointer_loss": 1.1164461374282837, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 2.3576612875331193e-05, "loss_breakdown/pointer_loss": 0.9910014867782593, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 2.6138182875001803e-05, "loss_breakdown/pointer_loss": 0.25268834829330444, "step": 1940 }, { "epoch": 0.19783049291914698, "loss_breakdown/lm_loss": 2.6044752303278074e-05, "loss_breakdown/pointer_loss": 0.2178066372871399, "step": 1940 }, { "epoch": 0.19885023772800856, "grad_norm": 27.799881941219464, "learning_rate": 4.451558073654391e-06, "loss": 0.3126, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 0.0001329496590187773, "loss_breakdown/pointer_loss": 0.8439360857009888, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 4.301409353502095e-05, "loss_breakdown/pointer_loss": 0.915962815284729, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 3.222445957362652e-05, "loss_breakdown/pointer_loss": 1.0668715238571167, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 4.547187563730404e-05, "loss_breakdown/pointer_loss": 1.1300020217895508, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 2.354182106500957e-05, "loss_breakdown/pointer_loss": 0.5358169078826904, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 3.4311629860894755e-05, "loss_breakdown/pointer_loss": 0.6242634057998657, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 3.1174658943200484e-05, "loss_breakdown/pointer_loss": 0.22271469235420227, "step": 1950 }, { "epoch": 0.19885023772800856, "loss_breakdown/lm_loss": 3.2918174838414416e-05, "loss_breakdown/pointer_loss": 0.5370392799377441, "step": 1950 }, { "epoch": 0.19986998253687016, "grad_norm": 4.050112511346861, "learning_rate": 4.4458923512747885e-06, "loss": 0.3022, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 2.5904615540639497e-05, "loss_breakdown/pointer_loss": 0.6428866982460022, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 2.1893620214541443e-05, "loss_breakdown/pointer_loss": 0.23152172565460205, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 2.6141282432945445e-05, "loss_breakdown/pointer_loss": 0.5212379097938538, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 2.833047983585857e-05, "loss_breakdown/pointer_loss": 0.4034000039100647, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 3.483043474261649e-05, "loss_breakdown/pointer_loss": 0.25193142890930176, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 6.36235999991186e-05, "loss_breakdown/pointer_loss": 0.4799710512161255, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 2.5915011065080762e-05, "loss_breakdown/pointer_loss": 0.14226721227169037, "step": 1960 }, { "epoch": 0.19986998253687016, "loss_breakdown/lm_loss": 5.1985520258313045e-05, "loss_breakdown/pointer_loss": 0.49804264307022095, "step": 1960 }, { "epoch": 0.20088972734573174, "grad_norm": 4.311999057447221, "learning_rate": 4.440226628895184e-06, "loss": 0.3272, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 2.902571577578783e-05, "loss_breakdown/pointer_loss": 0.6362966895103455, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 2.959818266390357e-05, "loss_breakdown/pointer_loss": 0.577858567237854, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 2.873308039852418e-05, "loss_breakdown/pointer_loss": 0.4545801281929016, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 3.5313940315973014e-05, "loss_breakdown/pointer_loss": 0.3857901096343994, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 2.298568324476946e-05, "loss_breakdown/pointer_loss": 0.5378491878509521, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 2.1292606106726453e-05, "loss_breakdown/pointer_loss": 0.20469731092453003, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 1.808674460335169e-05, "loss_breakdown/pointer_loss": 0.16141095757484436, "step": 1970 }, { "epoch": 0.20088972734573174, "loss_breakdown/lm_loss": 2.3482889446313493e-05, "loss_breakdown/pointer_loss": 0.7754586935043335, "step": 1970 }, { "epoch": 0.20190947215459332, "grad_norm": 7.769890393787815, "learning_rate": 4.434560906515581e-06, "loss": 0.2985, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 2.2589380023418926e-05, "loss_breakdown/pointer_loss": 0.2545534670352936, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 2.7679006961989217e-05, "loss_breakdown/pointer_loss": 1.527031421661377, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 4.092347080586478e-05, "loss_breakdown/pointer_loss": 0.3286951780319214, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 3.655007094494067e-05, "loss_breakdown/pointer_loss": 0.5579057931900024, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 4.09286622016225e-05, "loss_breakdown/pointer_loss": 1.5678298473358154, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 2.9037326385150664e-05, "loss_breakdown/pointer_loss": 0.30691248178482056, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 2.0682155081885867e-05, "loss_breakdown/pointer_loss": 0.13701596856117249, "step": 1980 }, { "epoch": 0.20190947215459332, "loss_breakdown/lm_loss": 2.3864784452598542e-05, "loss_breakdown/pointer_loss": 2.441830635070801, "step": 1980 }, { "epoch": 0.2029292169634549, "grad_norm": 7.408479487028429, "learning_rate": 4.428895184135978e-06, "loss": 0.3217, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.142581797670573e-05, "loss_breakdown/pointer_loss": 0.14979621767997742, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.1216597815509886e-05, "loss_breakdown/pointer_loss": 0.7096388339996338, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.057841447822284e-05, "loss_breakdown/pointer_loss": 0.26556891202926636, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 1.9955503375967965e-05, "loss_breakdown/pointer_loss": 0.5753726959228516, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.157598100893665e-05, "loss_breakdown/pointer_loss": 0.302410364151001, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.3473934561479837e-05, "loss_breakdown/pointer_loss": 0.23266303539276123, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.2078729671193287e-05, "loss_breakdown/pointer_loss": 0.23018258810043335, "step": 1990 }, { "epoch": 0.2029292169634549, "loss_breakdown/lm_loss": 2.0358640540507622e-05, "loss_breakdown/pointer_loss": 0.24009646475315094, "step": 1990 }, { "epoch": 0.20394896177231647, "grad_norm": 19.14493230213136, "learning_rate": 4.4232294617563745e-06, "loss": 0.3158, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 0.00012646608229260892, "loss_breakdown/pointer_loss": 2.6792025566101074, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 3.877237031701952e-05, "loss_breakdown/pointer_loss": 1.2707366943359375, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 2.6372892534709536e-05, "loss_breakdown/pointer_loss": 0.7908077239990234, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 3.137846942991018e-05, "loss_breakdown/pointer_loss": 2.276552677154541, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 3.659935828181915e-05, "loss_breakdown/pointer_loss": 0.8556295037269592, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 2.673850758583285e-05, "loss_breakdown/pointer_loss": 0.7518693208694458, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 2.2244648789637722e-05, "loss_breakdown/pointer_loss": 0.2644430994987488, "step": 2000 }, { "epoch": 0.20394896177231647, "loss_breakdown/lm_loss": 1.8835016817320138e-05, "loss_breakdown/pointer_loss": 0.41416096687316895, "step": 2000 }, { "epoch": 0.20496870658117805, "grad_norm": 14.338985559576248, "learning_rate": 4.417563739376771e-06, "loss": 0.3159, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 1.9387895008549094e-05, "loss_breakdown/pointer_loss": 0.2627314329147339, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 2.1103303879499435e-05, "loss_breakdown/pointer_loss": 0.5172909498214722, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 1.7074256902560592e-05, "loss_breakdown/pointer_loss": 0.2728846073150635, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 2.0471452444326133e-05, "loss_breakdown/pointer_loss": 0.8931363821029663, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 1.9470231563900597e-05, "loss_breakdown/pointer_loss": 0.36515992879867554, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 2.2011501641827635e-05, "loss_breakdown/pointer_loss": 0.3537721335887909, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 2.0288742234697565e-05, "loss_breakdown/pointer_loss": 0.16669875383377075, "step": 2010 }, { "epoch": 0.20496870658117805, "loss_breakdown/lm_loss": 1.84609762072796e-05, "loss_breakdown/pointer_loss": 0.17497074604034424, "step": 2010 }, { "epoch": 0.20598845139003966, "grad_norm": 3.408212048811635, "learning_rate": 4.4118980169971675e-06, "loss": 0.3468, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 1.8753060430753976e-05, "loss_breakdown/pointer_loss": 0.5315536260604858, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 2.1833955543115735e-05, "loss_breakdown/pointer_loss": 0.24609941244125366, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 2.223275077994913e-05, "loss_breakdown/pointer_loss": 0.5013658404350281, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 2.651598879310768e-05, "loss_breakdown/pointer_loss": 0.3847087025642395, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 2.0418163330759853e-05, "loss_breakdown/pointer_loss": 0.4853222370147705, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 2.764936834864784e-05, "loss_breakdown/pointer_loss": 0.42848286032676697, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 1.730971416691318e-05, "loss_breakdown/pointer_loss": 0.5285370349884033, "step": 2020 }, { "epoch": 0.20598845139003966, "loss_breakdown/lm_loss": 1.6593452528468333e-05, "loss_breakdown/pointer_loss": 0.17037703096866608, "step": 2020 }, { "epoch": 0.20700819619890123, "grad_norm": 13.393719478045908, "learning_rate": 4.406232294617564e-06, "loss": 0.2987, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 1.8222586732008494e-05, "loss_breakdown/pointer_loss": 0.09332889318466187, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 3.1250885513145477e-05, "loss_breakdown/pointer_loss": 0.36517998576164246, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 2.2954927771934308e-05, "loss_breakdown/pointer_loss": 2.6546363830566406, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 2.4556173229939304e-05, "loss_breakdown/pointer_loss": 0.44046199321746826, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 2.0094099454581738e-05, "loss_breakdown/pointer_loss": 0.511561393737793, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 2.0340426999609917e-05, "loss_breakdown/pointer_loss": 0.1157761737704277, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 3.633486630860716e-05, "loss_breakdown/pointer_loss": 0.25754666328430176, "step": 2030 }, { "epoch": 0.20700819619890123, "loss_breakdown/lm_loss": 2.632016912684776e-05, "loss_breakdown/pointer_loss": 0.6656272411346436, "step": 2030 }, { "epoch": 0.2080279410077628, "grad_norm": 5.22326244785412, "learning_rate": 4.4005665722379605e-06, "loss": 0.3493, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 1.9772078303503804e-05, "loss_breakdown/pointer_loss": 0.5215128064155579, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 2.5826793716987595e-05, "loss_breakdown/pointer_loss": 0.21160130202770233, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 2.037876402027905e-05, "loss_breakdown/pointer_loss": 0.4505804777145386, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 3.092763290624134e-05, "loss_breakdown/pointer_loss": 0.6236115097999573, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 2.018717168539297e-05, "loss_breakdown/pointer_loss": 0.1447642743587494, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 2.574398422439117e-05, "loss_breakdown/pointer_loss": 0.30787065625190735, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 2.371667869738303e-05, "loss_breakdown/pointer_loss": 0.1573234498500824, "step": 2040 }, { "epoch": 0.2080279410077628, "loss_breakdown/lm_loss": 2.6400588467367925e-05, "loss_breakdown/pointer_loss": 0.19611653685569763, "step": 2040 }, { "epoch": 0.2090476858166244, "grad_norm": 7.97619941345998, "learning_rate": 4.394900849858357e-06, "loss": 0.3081, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 0.00014695434947498143, "loss_breakdown/pointer_loss": 2.0533721446990967, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 4.6259359805844724e-05, "loss_breakdown/pointer_loss": 0.6386065483093262, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 4.5547134504886344e-05, "loss_breakdown/pointer_loss": 0.9787840843200684, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 3.934278720407747e-05, "loss_breakdown/pointer_loss": 0.714788019657135, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 2.846861389116384e-05, "loss_breakdown/pointer_loss": 0.7363721132278442, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 2.8069174732081592e-05, "loss_breakdown/pointer_loss": 0.52730393409729, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 2.3701166355749592e-05, "loss_breakdown/pointer_loss": 0.45683228969573975, "step": 2050 }, { "epoch": 0.2090476858166244, "loss_breakdown/lm_loss": 2.059341386484448e-05, "loss_breakdown/pointer_loss": 0.2859417498111725, "step": 2050 }, { "epoch": 0.21006743062548597, "grad_norm": 11.883872788938554, "learning_rate": 4.3892351274787535e-06, "loss": 0.307, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 1.7777574612409808e-05, "loss_breakdown/pointer_loss": 0.4621610641479492, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 2.121272154909093e-05, "loss_breakdown/pointer_loss": 0.3800821900367737, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 2.3395847165375017e-05, "loss_breakdown/pointer_loss": 0.26306360960006714, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 2.24542964133434e-05, "loss_breakdown/pointer_loss": 0.10654652118682861, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 2.4986897187773138e-05, "loss_breakdown/pointer_loss": 0.18400344252586365, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 1.8111320969182998e-05, "loss_breakdown/pointer_loss": 0.19948233664035797, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 2.9467042622854933e-05, "loss_breakdown/pointer_loss": 1.2992658615112305, "step": 2060 }, { "epoch": 0.21006743062548597, "loss_breakdown/lm_loss": 2.255356957903132e-05, "loss_breakdown/pointer_loss": 0.24765858054161072, "step": 2060 }, { "epoch": 0.21108717543434755, "grad_norm": 2.2198103530909536, "learning_rate": 4.383569405099151e-06, "loss": 0.3128, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.1967980501358397e-05, "loss_breakdown/pointer_loss": 0.8480300903320312, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.7020054403692484e-05, "loss_breakdown/pointer_loss": 0.6369835734367371, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.0454617697396316e-05, "loss_breakdown/pointer_loss": 0.24777013063430786, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.18887762457598e-05, "loss_breakdown/pointer_loss": 0.1975151002407074, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.242835398647003e-05, "loss_breakdown/pointer_loss": 0.39730894565582275, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.278012289025355e-05, "loss_breakdown/pointer_loss": 0.4597993791103363, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.20071742660366e-05, "loss_breakdown/pointer_loss": 0.34592634439468384, "step": 2070 }, { "epoch": 0.21108717543434755, "loss_breakdown/lm_loss": 2.7780286472989246e-05, "loss_breakdown/pointer_loss": 0.5464175939559937, "step": 2070 }, { "epoch": 0.21210692024320912, "grad_norm": 3.96956700899625, "learning_rate": 4.377903682719547e-06, "loss": 0.2948, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 2.337996738788206e-05, "loss_breakdown/pointer_loss": 0.4589161276817322, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 2.7857875465997495e-05, "loss_breakdown/pointer_loss": 0.7437282800674438, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 1.788879671948962e-05, "loss_breakdown/pointer_loss": 0.13073326647281647, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 2.937582576123532e-05, "loss_breakdown/pointer_loss": 0.3000373840332031, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 2.128214509866666e-05, "loss_breakdown/pointer_loss": 0.6871129274368286, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 2.081314960378222e-05, "loss_breakdown/pointer_loss": 0.03694961965084076, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 3.746199217857793e-05, "loss_breakdown/pointer_loss": 2.869957447052002, "step": 2080 }, { "epoch": 0.21210692024320912, "loss_breakdown/lm_loss": 2.013381708820816e-05, "loss_breakdown/pointer_loss": 0.09330391883850098, "step": 2080 }, { "epoch": 0.21312666505207073, "grad_norm": 3.550676747716303, "learning_rate": 4.372237960339944e-06, "loss": 0.3286, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 3.123364513157867e-05, "loss_breakdown/pointer_loss": 0.2909654378890991, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 2.3606567992828786e-05, "loss_breakdown/pointer_loss": 0.2032548487186432, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 2.2240361431613564e-05, "loss_breakdown/pointer_loss": 0.532279372215271, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 2.5319071937701665e-05, "loss_breakdown/pointer_loss": 0.21501842141151428, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 3.4626806154847145e-05, "loss_breakdown/pointer_loss": 0.3024559020996094, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 2.1385429135989398e-05, "loss_breakdown/pointer_loss": 0.5319907665252686, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 2.0663903342210688e-05, "loss_breakdown/pointer_loss": 0.18062977492809296, "step": 2090 }, { "epoch": 0.21312666505207073, "loss_breakdown/lm_loss": 2.399784716544673e-05, "loss_breakdown/pointer_loss": 0.33547186851501465, "step": 2090 }, { "epoch": 0.2141464098609323, "grad_norm": 34.638319931805455, "learning_rate": 4.36657223796034e-06, "loss": 0.2765, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 5.861185854882933e-05, "loss_breakdown/pointer_loss": 2.4854958057403564, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 4.416934825712815e-05, "loss_breakdown/pointer_loss": 0.6055197715759277, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 3.1763567676534876e-05, "loss_breakdown/pointer_loss": 1.1053961515426636, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 2.9352471756283194e-05, "loss_breakdown/pointer_loss": 0.9531891345977783, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 3.5164932342013344e-05, "loss_breakdown/pointer_loss": 1.5915443897247314, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 2.4229333575931378e-05, "loss_breakdown/pointer_loss": 0.8158702850341797, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 2.0671654056059197e-05, "loss_breakdown/pointer_loss": 0.4335600733757019, "step": 2100 }, { "epoch": 0.2141464098609323, "loss_breakdown/lm_loss": 7.258303230628371e-05, "loss_breakdown/pointer_loss": 0.8718538880348206, "step": 2100 }, { "epoch": 0.21516615466979389, "grad_norm": 3.0799387171950197, "learning_rate": 4.360906515580737e-06, "loss": 0.3148, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 2.6572241040412337e-05, "loss_breakdown/pointer_loss": 0.2790278196334839, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 2.6869825887843035e-05, "loss_breakdown/pointer_loss": 0.3110947608947754, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 2.9574583095381968e-05, "loss_breakdown/pointer_loss": 0.3733783960342407, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 2.1723082682001404e-05, "loss_breakdown/pointer_loss": 0.20002341270446777, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 2.705148290260695e-05, "loss_breakdown/pointer_loss": 0.2370019108057022, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 2.3233003958011977e-05, "loss_breakdown/pointer_loss": 0.4121200442314148, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 8.946856542024761e-05, "loss_breakdown/pointer_loss": 1.6965770721435547, "step": 2110 }, { "epoch": 0.21516615466979389, "loss_breakdown/lm_loss": 3.0829720344627276e-05, "loss_breakdown/pointer_loss": 1.8001421689987183, "step": 2110 }, { "epoch": 0.21618589947865546, "grad_norm": 1.8261352078503434, "learning_rate": 4.355240793201133e-06, "loss": 0.3134, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 3.2957155781332403e-05, "loss_breakdown/pointer_loss": 0.2952430844306946, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 3.3721902582328767e-05, "loss_breakdown/pointer_loss": 0.7982889413833618, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 2.4340230083907954e-05, "loss_breakdown/pointer_loss": 0.268127977848053, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 2.6667208658182062e-05, "loss_breakdown/pointer_loss": 0.25284725427627563, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 3.112836566288024e-05, "loss_breakdown/pointer_loss": 0.40770867466926575, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 3.3352727768942714e-05, "loss_breakdown/pointer_loss": 1.5656942129135132, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 2.3013413738226518e-05, "loss_breakdown/pointer_loss": 0.1323339194059372, "step": 2120 }, { "epoch": 0.21618589947865546, "loss_breakdown/lm_loss": 2.165139449061826e-05, "loss_breakdown/pointer_loss": 0.4788579046726227, "step": 2120 }, { "epoch": 0.21720564428751704, "grad_norm": 4.860585461524473, "learning_rate": 4.34957507082153e-06, "loss": 0.3046, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 3.26730078086257e-05, "loss_breakdown/pointer_loss": 0.7203733921051025, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 2.5581060981494375e-05, "loss_breakdown/pointer_loss": 0.8730261325836182, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 1.9525889001670294e-05, "loss_breakdown/pointer_loss": 0.3405880928039551, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 3.0023376893950626e-05, "loss_breakdown/pointer_loss": 1.4151718616485596, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 3.0071079891058616e-05, "loss_breakdown/pointer_loss": 1.5684289932250977, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 2.011004471569322e-05, "loss_breakdown/pointer_loss": 0.253460168838501, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 3.177573307766579e-05, "loss_breakdown/pointer_loss": 1.0297808647155762, "step": 2130 }, { "epoch": 0.21720564428751704, "loss_breakdown/lm_loss": 2.1607604139717296e-05, "loss_breakdown/pointer_loss": 0.1844683140516281, "step": 2130 }, { "epoch": 0.21822538909637862, "grad_norm": 12.120761610502006, "learning_rate": 4.343909348441926e-06, "loss": 0.3413, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 1.9000774045707658e-05, "loss_breakdown/pointer_loss": 0.5841309428215027, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 1.6982814486254938e-05, "loss_breakdown/pointer_loss": 0.2677898108959198, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 2.409965600236319e-05, "loss_breakdown/pointer_loss": 0.668929934501648, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 2.1234414816717617e-05, "loss_breakdown/pointer_loss": 0.509157657623291, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 1.9212622646591626e-05, "loss_breakdown/pointer_loss": 0.28579604625701904, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 2.2589914806303568e-05, "loss_breakdown/pointer_loss": 0.4374891221523285, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 2.207594116043765e-05, "loss_breakdown/pointer_loss": 0.38600823283195496, "step": 2140 }, { "epoch": 0.21822538909637862, "loss_breakdown/lm_loss": 2.196401692344807e-05, "loss_breakdown/pointer_loss": 0.3989821672439575, "step": 2140 }, { "epoch": 0.21924513390524022, "grad_norm": 15.426513372633478, "learning_rate": 4.338243626062324e-06, "loss": 0.289, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 0.0001226865715580061, "loss_breakdown/pointer_loss": 0.6418293714523315, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 3.842413207166828e-05, "loss_breakdown/pointer_loss": 0.6691540479660034, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 2.271315679536201e-05, "loss_breakdown/pointer_loss": 0.5426298975944519, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 2.8699369067908265e-05, "loss_breakdown/pointer_loss": 0.54985111951828, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 2.670701542228926e-05, "loss_breakdown/pointer_loss": 0.9851815104484558, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 2.5956273020710796e-05, "loss_breakdown/pointer_loss": 0.9536702036857605, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 2.2301408534985967e-05, "loss_breakdown/pointer_loss": 0.48425590991973877, "step": 2150 }, { "epoch": 0.21924513390524022, "loss_breakdown/lm_loss": 2.0033630789839663e-05, "loss_breakdown/pointer_loss": 0.34737855195999146, "step": 2150 }, { "epoch": 0.2202648787141018, "grad_norm": 2.7615250922356807, "learning_rate": 4.332577903682719e-06, "loss": 0.2924, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 1.5712919775978662e-05, "loss_breakdown/pointer_loss": 0.23490118980407715, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 3.144311995129101e-05, "loss_breakdown/pointer_loss": 0.3677211105823517, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 2.6612098736222833e-05, "loss_breakdown/pointer_loss": 0.2157927006483078, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 1.5425226592924446e-05, "loss_breakdown/pointer_loss": 0.06840778887271881, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 1.8588030798127875e-05, "loss_breakdown/pointer_loss": 3.3079514503479004, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 2.313363438588567e-05, "loss_breakdown/pointer_loss": 0.3656688630580902, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 1.9470051483949646e-05, "loss_breakdown/pointer_loss": 0.1168021559715271, "step": 2160 }, { "epoch": 0.2202648787141018, "loss_breakdown/lm_loss": 2.128176492988132e-05, "loss_breakdown/pointer_loss": 0.10028555989265442, "step": 2160 }, { "epoch": 0.22128462352296338, "grad_norm": 2.4973263778532795, "learning_rate": 4.326912181303117e-06, "loss": 0.3436, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 2.842474168573972e-05, "loss_breakdown/pointer_loss": 0.2777915894985199, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 2.043215681624133e-05, "loss_breakdown/pointer_loss": 1.277076244354248, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 4.846435331273824e-05, "loss_breakdown/pointer_loss": 0.3570486903190613, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 2.1118379663676023e-05, "loss_breakdown/pointer_loss": 0.31580179929733276, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 3.386416574358009e-05, "loss_breakdown/pointer_loss": 1.0270816087722778, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 2.7851667255163193e-05, "loss_breakdown/pointer_loss": 0.4637279808521271, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 2.8051326808054e-05, "loss_breakdown/pointer_loss": 0.6505633592605591, "step": 2170 }, { "epoch": 0.22128462352296338, "loss_breakdown/lm_loss": 1.77250913111493e-05, "loss_breakdown/pointer_loss": 0.2871910333633423, "step": 2170 }, { "epoch": 0.22230436833182496, "grad_norm": 5.55484627359426, "learning_rate": 4.321246458923513e-06, "loss": 0.2999, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 2.1035733880125917e-05, "loss_breakdown/pointer_loss": 0.18467219173908234, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 2.6121238988707773e-05, "loss_breakdown/pointer_loss": 0.522348165512085, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 1.509944922872819e-05, "loss_breakdown/pointer_loss": 2.0424370765686035, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 1.5151073057495523e-05, "loss_breakdown/pointer_loss": 0.22324490547180176, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 2.4996981665026397e-05, "loss_breakdown/pointer_loss": 0.4922753572463989, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 8.219136361731216e-05, "loss_breakdown/pointer_loss": 0.14919762313365936, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 3.357509558554739e-05, "loss_breakdown/pointer_loss": 5.428339004516602, "step": 2180 }, { "epoch": 0.22230436833182496, "loss_breakdown/lm_loss": 1.361733848170843e-05, "loss_breakdown/pointer_loss": 0.03987929970026016, "step": 2180 }, { "epoch": 0.22332411314068654, "grad_norm": 4.307058926305367, "learning_rate": 4.31558073654391e-06, "loss": 0.3107, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.9006360162165947e-05, "loss_breakdown/pointer_loss": 1.0151863098144531, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.971855090232566e-05, "loss_breakdown/pointer_loss": 0.44321420788764954, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.998107836698182e-05, "loss_breakdown/pointer_loss": 0.2657417058944702, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.7506647054688074e-05, "loss_breakdown/pointer_loss": 0.20280277729034424, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 2.0788169422303326e-05, "loss_breakdown/pointer_loss": 0.28870251774787903, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.784424421202857e-05, "loss_breakdown/pointer_loss": 0.34737858176231384, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.697356674412731e-05, "loss_breakdown/pointer_loss": 0.3361743688583374, "step": 2190 }, { "epoch": 0.22332411314068654, "loss_breakdown/lm_loss": 1.841184712247923e-05, "loss_breakdown/pointer_loss": 0.303851842880249, "step": 2190 }, { "epoch": 0.22434385794954811, "grad_norm": 9.587161190671024, "learning_rate": 4.309915014164306e-06, "loss": 0.3205, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 8.556232205592096e-05, "loss_breakdown/pointer_loss": 1.709203839302063, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 4.649745096685365e-05, "loss_breakdown/pointer_loss": 0.6270321607589722, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 3.478664075373672e-05, "loss_breakdown/pointer_loss": 1.9850401878356934, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 3.759083483600989e-05, "loss_breakdown/pointer_loss": 1.3297133445739746, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 2.7213693101657555e-05, "loss_breakdown/pointer_loss": 1.2641528844833374, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 3.0737253837287426e-05, "loss_breakdown/pointer_loss": 0.6217080354690552, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 2.3321406843024306e-05, "loss_breakdown/pointer_loss": 0.670615553855896, "step": 2200 }, { "epoch": 0.22434385794954811, "loss_breakdown/lm_loss": 4.829147655982524e-05, "loss_breakdown/pointer_loss": 0.501424252986908, "step": 2200 }, { "epoch": 0.22536360275840972, "grad_norm": 3.0886902156116713, "learning_rate": 4.304249291784703e-06, "loss": 0.3268, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.0720037355204113e-05, "loss_breakdown/pointer_loss": 0.11528456211090088, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 3.563717837096192e-05, "loss_breakdown/pointer_loss": 1.3564133644104004, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.7577050786931068e-05, "loss_breakdown/pointer_loss": 0.3447650372982025, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.588504685263615e-05, "loss_breakdown/pointer_loss": 0.718163251876831, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.7452573704067618e-05, "loss_breakdown/pointer_loss": 0.8239977955818176, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.3705701096332632e-05, "loss_breakdown/pointer_loss": 0.5474264621734619, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.2005073333275504e-05, "loss_breakdown/pointer_loss": 0.2897937595844269, "step": 2210 }, { "epoch": 0.22536360275840972, "loss_breakdown/lm_loss": 2.301846325281076e-05, "loss_breakdown/pointer_loss": 4.0750932693481445, "step": 2210 }, { "epoch": 0.2263833475672713, "grad_norm": 3.4897810242158007, "learning_rate": 4.298583569405099e-06, "loss": 0.3111, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 2.015079189732205e-05, "loss_breakdown/pointer_loss": 0.1882474422454834, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 2.5818189897108823e-05, "loss_breakdown/pointer_loss": 0.5454026460647583, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 3.509777889121324e-05, "loss_breakdown/pointer_loss": 0.20870833098888397, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 2.05120650207391e-05, "loss_breakdown/pointer_loss": 0.7202188968658447, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 3.163703149766661e-05, "loss_breakdown/pointer_loss": 0.3239561915397644, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 2.3127260647015646e-05, "loss_breakdown/pointer_loss": 0.6208868622779846, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 2.2385218471754342e-05, "loss_breakdown/pointer_loss": 1.152167558670044, "step": 2220 }, { "epoch": 0.2263833475672713, "loss_breakdown/lm_loss": 2.0035899069625884e-05, "loss_breakdown/pointer_loss": 0.24733170866966248, "step": 2220 }, { "epoch": 0.22740309237613288, "grad_norm": 6.129706918234914, "learning_rate": 4.292917847025497e-06, "loss": 0.2793, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 1.8132537661585957e-05, "loss_breakdown/pointer_loss": 0.6968210339546204, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 1.6752381270634942e-05, "loss_breakdown/pointer_loss": 0.24072027206420898, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 1.651796992518939e-05, "loss_breakdown/pointer_loss": 0.3457833230495453, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 2.1782570911454968e-05, "loss_breakdown/pointer_loss": 2.323791265487671, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 1.9100585632259026e-05, "loss_breakdown/pointer_loss": 0.03808826208114624, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 2.1321749954950064e-05, "loss_breakdown/pointer_loss": 0.44482719898223877, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 1.8305943740415387e-05, "loss_breakdown/pointer_loss": 0.3907075822353363, "step": 2230 }, { "epoch": 0.22740309237613288, "loss_breakdown/lm_loss": 1.5886093024164438e-05, "loss_breakdown/pointer_loss": 0.035063356161117554, "step": 2230 }, { "epoch": 0.22842283718499445, "grad_norm": 5.722662278244876, "learning_rate": 4.287252124645892e-06, "loss": 0.3135, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 1.659420013311319e-05, "loss_breakdown/pointer_loss": 0.4698307514190674, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 1.865367084974423e-05, "loss_breakdown/pointer_loss": 0.4223618507385254, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 2.2907706807018258e-05, "loss_breakdown/pointer_loss": 0.2928129732608795, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 1.739127401378937e-05, "loss_breakdown/pointer_loss": 0.3231266736984253, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 1.8842238205252215e-05, "loss_breakdown/pointer_loss": 0.3483178913593292, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 1.8488994101062417e-05, "loss_breakdown/pointer_loss": 0.24584689736366272, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 2.0750750991282985e-05, "loss_breakdown/pointer_loss": 0.10675995796918869, "step": 2240 }, { "epoch": 0.22842283718499445, "loss_breakdown/lm_loss": 1.6175643395399675e-05, "loss_breakdown/pointer_loss": 0.414223313331604, "step": 2240 }, { "epoch": 0.22944258199385603, "grad_norm": 8.716317528378779, "learning_rate": 4.28158640226629e-06, "loss": 0.3173, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 6.290210149018094e-05, "loss_breakdown/pointer_loss": 2.6820530891418457, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 6.498720176750794e-05, "loss_breakdown/pointer_loss": 1.2907460927963257, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 5.256459189695306e-05, "loss_breakdown/pointer_loss": 0.8254290223121643, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 4.97808177897241e-05, "loss_breakdown/pointer_loss": 0.4973626732826233, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 2.464946919644717e-05, "loss_breakdown/pointer_loss": 1.1192351579666138, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 2.4723793103476055e-05, "loss_breakdown/pointer_loss": 1.0891456604003906, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 3.224434476578608e-05, "loss_breakdown/pointer_loss": 0.6589121222496033, "step": 2250 }, { "epoch": 0.22944258199385603, "loss_breakdown/lm_loss": 2.810059959301725e-05, "loss_breakdown/pointer_loss": 1.19580078125, "step": 2250 }, { "epoch": 0.2304623268027176, "grad_norm": 9.208780625633432, "learning_rate": 4.275920679886686e-06, "loss": 0.3273, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 2.246321673737839e-05, "loss_breakdown/pointer_loss": 1.8418539762496948, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 1.7017771824612282e-05, "loss_breakdown/pointer_loss": 0.42669281363487244, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 2.072721690637991e-05, "loss_breakdown/pointer_loss": 0.2832944095134735, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 1.9481156414258294e-05, "loss_breakdown/pointer_loss": 0.30127090215682983, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 2.136602961400058e-05, "loss_breakdown/pointer_loss": 0.2999873161315918, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 2.5207464204868302e-05, "loss_breakdown/pointer_loss": 0.39742299914360046, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 1.4970271877245978e-05, "loss_breakdown/pointer_loss": 0.24958987534046173, "step": 2260 }, { "epoch": 0.2304623268027176, "loss_breakdown/lm_loss": 3.0821840482531115e-05, "loss_breakdown/pointer_loss": 0.5482963919639587, "step": 2260 }, { "epoch": 0.23148207161157922, "grad_norm": 3.824099026991032, "learning_rate": 4.270254957507083e-06, "loss": 0.3289, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.6376505047664978e-05, "loss_breakdown/pointer_loss": 0.5477230548858643, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.8948339604539797e-05, "loss_breakdown/pointer_loss": 0.6186500191688538, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.918291647802107e-05, "loss_breakdown/pointer_loss": 0.2797675132751465, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.0817911718040705e-05, "loss_breakdown/pointer_loss": 0.5346931219100952, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.1799949536216445e-05, "loss_breakdown/pointer_loss": 0.24916645884513855, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.4583203412475996e-05, "loss_breakdown/pointer_loss": 0.4076835513114929, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 2.5153514798148535e-05, "loss_breakdown/pointer_loss": 0.6182518601417542, "step": 2270 }, { "epoch": 0.23148207161157922, "loss_breakdown/lm_loss": 3.072834078921005e-05, "loss_breakdown/pointer_loss": 0.22978045046329498, "step": 2270 }, { "epoch": 0.2325018164204408, "grad_norm": 14.07138651370327, "learning_rate": 4.264589235127479e-06, "loss": 0.3241, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 7.23143748473376e-05, "loss_breakdown/pointer_loss": 0.4410143792629242, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 2.7488400519359857e-05, "loss_breakdown/pointer_loss": 0.40158236026763916, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 2.9093298508087173e-05, "loss_breakdown/pointer_loss": 2.3122270107269287, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 1.811511356208939e-05, "loss_breakdown/pointer_loss": 0.13109827041625977, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 2.4460670829284936e-05, "loss_breakdown/pointer_loss": 0.20725369453430176, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 2.928802314272616e-05, "loss_breakdown/pointer_loss": 0.43894508481025696, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 1.64821685757488e-05, "loss_breakdown/pointer_loss": 0.5444533824920654, "step": 2280 }, { "epoch": 0.2325018164204408, "loss_breakdown/lm_loss": 2.1945626940578222e-05, "loss_breakdown/pointer_loss": 0.6351507902145386, "step": 2280 }, { "epoch": 0.23352156122930237, "grad_norm": 3.891723445645933, "learning_rate": 4.258923512747876e-06, "loss": 0.314, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 1.6160280210897326e-05, "loss_breakdown/pointer_loss": 0.2669829726219177, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 2.1727310013375245e-05, "loss_breakdown/pointer_loss": 0.20604906976222992, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 2.1989246306475252e-05, "loss_breakdown/pointer_loss": 0.42822426557540894, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 2.010902426263783e-05, "loss_breakdown/pointer_loss": 0.777897298336029, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 2.668986599019263e-05, "loss_breakdown/pointer_loss": 0.3136713206768036, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 1.8964541595778428e-05, "loss_breakdown/pointer_loss": 0.255003958940506, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 2.4066730475169607e-05, "loss_breakdown/pointer_loss": 0.23228585720062256, "step": 2290 }, { "epoch": 0.23352156122930237, "loss_breakdown/lm_loss": 2.0410499928402714e-05, "loss_breakdown/pointer_loss": 0.2832604944705963, "step": 2290 }, { "epoch": 0.23454130603816395, "grad_norm": 15.51580277362558, "learning_rate": 4.253257790368273e-06, "loss": 0.3038, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 5.5904518376337364e-05, "loss_breakdown/pointer_loss": 2.0531527996063232, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 3.376426320755854e-05, "loss_breakdown/pointer_loss": 0.8406360745429993, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 4.083027670276351e-05, "loss_breakdown/pointer_loss": 1.621599793434143, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 2.1146128347027116e-05, "loss_breakdown/pointer_loss": 0.27434131503105164, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 2.3294387574424036e-05, "loss_breakdown/pointer_loss": 0.8684746623039246, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 2.2835880372440442e-05, "loss_breakdown/pointer_loss": 2.1263561248779297, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 2.438210321997758e-05, "loss_breakdown/pointer_loss": 0.8344188928604126, "step": 2300 }, { "epoch": 0.23454130603816395, "loss_breakdown/lm_loss": 2.0403775124577805e-05, "loss_breakdown/pointer_loss": 0.6064655780792236, "step": 2300 }, { "epoch": 0.23556105084702553, "grad_norm": 7.221642046824264, "learning_rate": 4.247592067988669e-06, "loss": 0.327, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 1.969259392353706e-05, "loss_breakdown/pointer_loss": 0.622750997543335, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 2.781487455649767e-05, "loss_breakdown/pointer_loss": 0.3033252954483032, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 2.3427051928592846e-05, "loss_breakdown/pointer_loss": 0.36773669719696045, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 1.5955032722558826e-05, "loss_breakdown/pointer_loss": 0.3609793484210968, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 2.4007462343433872e-05, "loss_breakdown/pointer_loss": 0.6446061134338379, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 1.9096660253126174e-05, "loss_breakdown/pointer_loss": 2.0157744884490967, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 3.875592665281147e-05, "loss_breakdown/pointer_loss": 0.3674110472202301, "step": 2310 }, { "epoch": 0.23556105084702553, "loss_breakdown/lm_loss": 1.8743041437119246e-05, "loss_breakdown/pointer_loss": 0.15503954887390137, "step": 2310 }, { "epoch": 0.2365807956558871, "grad_norm": 2.9869335912410144, "learning_rate": 4.241926345609065e-06, "loss": 0.3065, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 2.492933526809793e-05, "loss_breakdown/pointer_loss": 0.3086988627910614, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 2.559924359957222e-05, "loss_breakdown/pointer_loss": 0.3412434458732605, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 2.429258165648207e-05, "loss_breakdown/pointer_loss": 0.6908311247825623, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 2.046191912086215e-05, "loss_breakdown/pointer_loss": 0.5277482867240906, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 1.7489248421043158e-05, "loss_breakdown/pointer_loss": 0.321267306804657, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 2.6452884412719868e-05, "loss_breakdown/pointer_loss": 0.29341602325439453, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 3.8267953641479835e-05, "loss_breakdown/pointer_loss": 0.618036687374115, "step": 2320 }, { "epoch": 0.2365807956558871, "loss_breakdown/lm_loss": 2.501846756786108e-05, "loss_breakdown/pointer_loss": 0.618661105632782, "step": 2320 }, { "epoch": 0.23760054046474868, "grad_norm": 5.757638181753103, "learning_rate": 4.2362606232294625e-06, "loss": 0.2929, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 4.8862872063182294e-05, "loss_breakdown/pointer_loss": 2.2049238681793213, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 4.2406507418490946e-05, "loss_breakdown/pointer_loss": 0.11740268766880035, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 3.144105721730739e-05, "loss_breakdown/pointer_loss": 0.08886952698230743, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 4.63820542790927e-05, "loss_breakdown/pointer_loss": 1.2337764501571655, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 2.6975236323778518e-05, "loss_breakdown/pointer_loss": 0.15767139196395874, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 2.3618007617187686e-05, "loss_breakdown/pointer_loss": 0.41596195101737976, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 1.6279507690342143e-05, "loss_breakdown/pointer_loss": 0.20562613010406494, "step": 2330 }, { "epoch": 0.23760054046474868, "loss_breakdown/lm_loss": 2.0674064217018895e-05, "loss_breakdown/pointer_loss": 0.3930025100708008, "step": 2330 }, { "epoch": 0.2386202852736103, "grad_norm": 7.036989332296205, "learning_rate": 4.230594900849859e-06, "loss": 0.3084, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.705046790651977e-05, "loss_breakdown/pointer_loss": 0.17381101846694946, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.0991985365981236e-05, "loss_breakdown/pointer_loss": 0.190041184425354, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.662244878592901e-05, "loss_breakdown/pointer_loss": 0.4117056429386139, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.1454045054269955e-05, "loss_breakdown/pointer_loss": 1.2827812433242798, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.1301933884387836e-05, "loss_breakdown/pointer_loss": 0.5178279876708984, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.052832132903859e-05, "loss_breakdown/pointer_loss": 0.12689250707626343, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.8295185984461568e-05, "loss_breakdown/pointer_loss": 0.4515313506126404, "step": 2340 }, { "epoch": 0.2386202852736103, "loss_breakdown/lm_loss": 2.8841863240813836e-05, "loss_breakdown/pointer_loss": 0.37269142270088196, "step": 2340 }, { "epoch": 0.23964003008247187, "grad_norm": 11.573956464124821, "learning_rate": 4.2249291784702555e-06, "loss": 0.2669, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 7.49090249883011e-05, "loss_breakdown/pointer_loss": 1.1104494333267212, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 2.9545237339334562e-05, "loss_breakdown/pointer_loss": 0.7399559020996094, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 3.446074697421864e-05, "loss_breakdown/pointer_loss": 0.6523722410202026, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 2.690650762815494e-05, "loss_breakdown/pointer_loss": 0.7917245030403137, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 2.3022001187200658e-05, "loss_breakdown/pointer_loss": 0.27709856629371643, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 2.608145041449461e-05, "loss_breakdown/pointer_loss": 0.3834482431411743, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 1.7434238543501124e-05, "loss_breakdown/pointer_loss": 0.27961617708206177, "step": 2350 }, { "epoch": 0.23964003008247187, "loss_breakdown/lm_loss": 2.9689726943615824e-05, "loss_breakdown/pointer_loss": 0.6384652853012085, "step": 2350 }, { "epoch": 0.24065977489133344, "grad_norm": 4.463257406276241, "learning_rate": 4.219263456090652e-06, "loss": 0.2976, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 1.7338321413262747e-05, "loss_breakdown/pointer_loss": 0.3452918529510498, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 2.2154945327201858e-05, "loss_breakdown/pointer_loss": 1.0912175178527832, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 2.691738336579874e-05, "loss_breakdown/pointer_loss": 0.38641107082366943, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 1.9749983039218932e-05, "loss_breakdown/pointer_loss": 2.724355697631836, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 2.10534235520754e-05, "loss_breakdown/pointer_loss": 0.35469335317611694, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 1.9231725673307665e-05, "loss_breakdown/pointer_loss": 0.321174681186676, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 2.790936196106486e-05, "loss_breakdown/pointer_loss": 0.44336530566215515, "step": 2360 }, { "epoch": 0.24065977489133344, "loss_breakdown/lm_loss": 1.8410588381811976e-05, "loss_breakdown/pointer_loss": 0.35590434074401855, "step": 2360 }, { "epoch": 0.24167951970019502, "grad_norm": 3.5540414687824358, "learning_rate": 4.2135977337110485e-06, "loss": 0.3306, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 1.4926567928341683e-05, "loss_breakdown/pointer_loss": 0.7054252028465271, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 1.7650530935497954e-05, "loss_breakdown/pointer_loss": 0.44890838861465454, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 2.8447588192648254e-05, "loss_breakdown/pointer_loss": 0.8477211594581604, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 1.3994297660246957e-05, "loss_breakdown/pointer_loss": 0.44670435786247253, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 1.7379379642079584e-05, "loss_breakdown/pointer_loss": 0.5800955295562744, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 1.6235775547102094e-05, "loss_breakdown/pointer_loss": 0.1876325011253357, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 2.3706737920292653e-05, "loss_breakdown/pointer_loss": 0.5044724941253662, "step": 2370 }, { "epoch": 0.24167951970019502, "loss_breakdown/lm_loss": 1.5196191270661075e-05, "loss_breakdown/pointer_loss": 0.48675525188446045, "step": 2370 }, { "epoch": 0.2426992645090566, "grad_norm": 6.451452044025877, "learning_rate": 4.207932011331445e-06, "loss": 0.3006, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 1.8039803762803786e-05, "loss_breakdown/pointer_loss": 1.0627604722976685, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 1.8492664821678773e-05, "loss_breakdown/pointer_loss": 0.11833631247282028, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 2.242229675175622e-05, "loss_breakdown/pointer_loss": 0.15782392024993896, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 2.4075352484942414e-05, "loss_breakdown/pointer_loss": 2.6713826656341553, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 2.068206413241569e-05, "loss_breakdown/pointer_loss": 0.27470090985298157, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 2.6743809939944185e-05, "loss_breakdown/pointer_loss": 0.16281144320964813, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 4.038748375023715e-05, "loss_breakdown/pointer_loss": 1.5921356678009033, "step": 2380 }, { "epoch": 0.2426992645090566, "loss_breakdown/lm_loss": 1.9342820451129228e-05, "loss_breakdown/pointer_loss": 0.07995201647281647, "step": 2380 }, { "epoch": 0.24371900931791818, "grad_norm": 1.9565886113397208, "learning_rate": 4.2022662889518415e-06, "loss": 0.2898, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 2.0006211343570612e-05, "loss_breakdown/pointer_loss": 2.923232316970825, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 2.1387673768913373e-05, "loss_breakdown/pointer_loss": 0.19547703862190247, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 2.118725751643069e-05, "loss_breakdown/pointer_loss": 0.8799545764923096, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 1.716082806524355e-05, "loss_breakdown/pointer_loss": 0.055132679641246796, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 1.7363165170536377e-05, "loss_breakdown/pointer_loss": 0.34357333183288574, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 1.7207650671480224e-05, "loss_breakdown/pointer_loss": 0.4262070953845978, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 1.7377198673784733e-05, "loss_breakdown/pointer_loss": 0.2638242542743683, "step": 2390 }, { "epoch": 0.24371900931791818, "loss_breakdown/lm_loss": 2.3368600523099303e-05, "loss_breakdown/pointer_loss": 0.23832383751869202, "step": 2390 }, { "epoch": 0.24473875412677978, "grad_norm": 7.293618246183232, "learning_rate": 4.196600566572238e-06, "loss": 0.2979, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 9.610949928173795e-05, "loss_breakdown/pointer_loss": 1.0573184490203857, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 0.00010309959179721773, "loss_breakdown/pointer_loss": 1.2295804023742676, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 4.852934944210574e-05, "loss_breakdown/pointer_loss": 0.595096230506897, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 4.7423622163478285e-05, "loss_breakdown/pointer_loss": 0.699101448059082, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 3.3663425710983574e-05, "loss_breakdown/pointer_loss": 0.7030513286590576, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 4.190756953903474e-05, "loss_breakdown/pointer_loss": 0.3978368937969208, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 2.6134819563594647e-05, "loss_breakdown/pointer_loss": 0.3298720717430115, "step": 2400 }, { "epoch": 0.24473875412677978, "loss_breakdown/lm_loss": 3.12167830998078e-05, "loss_breakdown/pointer_loss": 0.44371604919433594, "step": 2400 }, { "epoch": 0.24575849893564136, "grad_norm": 4.126749299581471, "learning_rate": 4.190934844192635e-06, "loss": 0.3134, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 2.015518293774221e-05, "loss_breakdown/pointer_loss": 0.2063368558883667, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 1.4605387150368188e-05, "loss_breakdown/pointer_loss": 0.22656583786010742, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 2.4131119062076323e-05, "loss_breakdown/pointer_loss": 0.30768030881881714, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 1.7247048162971623e-05, "loss_breakdown/pointer_loss": 0.11802825331687927, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 2.3929151211632416e-05, "loss_breakdown/pointer_loss": 0.18466264009475708, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 1.9091366993961856e-05, "loss_breakdown/pointer_loss": 0.7401946187019348, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 2.535818566684611e-05, "loss_breakdown/pointer_loss": 1.9039852619171143, "step": 2410 }, { "epoch": 0.24575849893564136, "loss_breakdown/lm_loss": 2.637317265907768e-05, "loss_breakdown/pointer_loss": 0.1776263266801834, "step": 2410 }, { "epoch": 0.24677824374450294, "grad_norm": 3.3467447811456634, "learning_rate": 4.185269121813032e-06, "loss": 0.3138, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 1.6414343917858787e-05, "loss_breakdown/pointer_loss": 0.5387917160987854, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 2.3708580556558445e-05, "loss_breakdown/pointer_loss": 0.18492767214775085, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 1.6899612091947347e-05, "loss_breakdown/pointer_loss": 0.2846049964427948, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 1.648744182602968e-05, "loss_breakdown/pointer_loss": 0.6274468898773193, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 1.7817990737967193e-05, "loss_breakdown/pointer_loss": 0.25672563910484314, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 3.7040521419839934e-05, "loss_breakdown/pointer_loss": 0.999610185623169, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 2.066043089143932e-05, "loss_breakdown/pointer_loss": 0.4580972492694855, "step": 2420 }, { "epoch": 0.24677824374450294, "loss_breakdown/lm_loss": 2.2087644538260065e-05, "loss_breakdown/pointer_loss": 0.6725918650627136, "step": 2420 }, { "epoch": 0.24779798855336452, "grad_norm": 9.845568163447194, "learning_rate": 4.1796033994334275e-06, "loss": 0.2907, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 3.324551653349772e-05, "loss_breakdown/pointer_loss": 0.2429044246673584, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 1.875097950687632e-05, "loss_breakdown/pointer_loss": 0.1456272155046463, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 2.1416724848677404e-05, "loss_breakdown/pointer_loss": 0.11778485774993896, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 3.0161796530592255e-05, "loss_breakdown/pointer_loss": 1.2467079162597656, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 0.00018143159104511142, "loss_breakdown/pointer_loss": 0.15656828880310059, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 1.4058343367651105e-05, "loss_breakdown/pointer_loss": 0.11246176064014435, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 1.9513685401761904e-05, "loss_breakdown/pointer_loss": 0.36073678731918335, "step": 2430 }, { "epoch": 0.24779798855336452, "loss_breakdown/lm_loss": 2.1281823137542233e-05, "loss_breakdown/pointer_loss": 0.27943676710128784, "step": 2430 }, { "epoch": 0.2488177333622261, "grad_norm": 2.2431071199509858, "learning_rate": 4.173937677053825e-06, "loss": 0.3185, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 1.6411167962360196e-05, "loss_breakdown/pointer_loss": 0.5252088308334351, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 2.2588799765799195e-05, "loss_breakdown/pointer_loss": 0.450081467628479, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 2.2095877284300514e-05, "loss_breakdown/pointer_loss": 0.34138816595077515, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 1.847199564508628e-05, "loss_breakdown/pointer_loss": 0.16195490956306458, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 2.4549597583245486e-05, "loss_breakdown/pointer_loss": 0.19350558519363403, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 5.7134038797812536e-05, "loss_breakdown/pointer_loss": 0.5040578842163086, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 1.6793555914773606e-05, "loss_breakdown/pointer_loss": 0.21713212132453918, "step": 2440 }, { "epoch": 0.2488177333622261, "loss_breakdown/lm_loss": 2.4318635041709058e-05, "loss_breakdown/pointer_loss": 0.14750970900058746, "step": 2440 }, { "epoch": 0.24983747817108767, "grad_norm": 6.515041349620655, "learning_rate": 4.168271954674221e-06, "loss": 0.3042, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 0.00010435167496325448, "loss_breakdown/pointer_loss": 1.5051265954971313, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 5.900255200685933e-05, "loss_breakdown/pointer_loss": 0.6747037172317505, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 0.0001373397681163624, "loss_breakdown/pointer_loss": 1.3622406721115112, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 3.176744212396443e-05, "loss_breakdown/pointer_loss": 0.7372121810913086, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 0.0001536859490443021, "loss_breakdown/pointer_loss": 0.5282818078994751, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 3.6649613321060315e-05, "loss_breakdown/pointer_loss": 0.4713090658187866, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 7.961892697494477e-05, "loss_breakdown/pointer_loss": 0.7701377868652344, "step": 2450 }, { "epoch": 0.24983747817108767, "loss_breakdown/lm_loss": 2.178703471145127e-05, "loss_breakdown/pointer_loss": 0.5413084030151367, "step": 2450 }, { "epoch": 0.2508572229799493, "grad_norm": 2.0675145540822157, "learning_rate": 4.162606232294618e-06, "loss": 0.3292, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 1.9818746295641176e-05, "loss_breakdown/pointer_loss": 0.1865171492099762, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 2.029919414781034e-05, "loss_breakdown/pointer_loss": 0.46407437324523926, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 1.8949729565065354e-05, "loss_breakdown/pointer_loss": 2.308514356613159, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 1.5492780221393332e-05, "loss_breakdown/pointer_loss": 0.27161991596221924, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 2.0182975276838988e-05, "loss_breakdown/pointer_loss": 0.8430375456809998, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 2.6244726541335694e-05, "loss_breakdown/pointer_loss": 0.363961398601532, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 2.0768007743754424e-05, "loss_breakdown/pointer_loss": 0.34888142347335815, "step": 2460 }, { "epoch": 0.2508572229799493, "loss_breakdown/lm_loss": 1.7924257917911746e-05, "loss_breakdown/pointer_loss": 0.15118801593780518, "step": 2460 }, { "epoch": 0.25187696778881086, "grad_norm": 3.498881763508228, "learning_rate": 4.156940509915014e-06, "loss": 0.3075, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 1.9017428712686524e-05, "loss_breakdown/pointer_loss": 0.6290779113769531, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 3.388548793736845e-05, "loss_breakdown/pointer_loss": 0.3087170720100403, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 1.6672835045028478e-05, "loss_breakdown/pointer_loss": 0.19755084812641144, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 1.7486656361143105e-05, "loss_breakdown/pointer_loss": 0.5678764581680298, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 3.123000715277158e-05, "loss_breakdown/pointer_loss": 0.48725950717926025, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 2.0872646928182803e-05, "loss_breakdown/pointer_loss": 0.32180485129356384, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 3.183032458764501e-05, "loss_breakdown/pointer_loss": 0.43487420678138733, "step": 2470 }, { "epoch": 0.25187696778881086, "loss_breakdown/lm_loss": 1.6339576177415438e-05, "loss_breakdown/pointer_loss": 0.23284192383289337, "step": 2470 }, { "epoch": 0.25289671259767244, "grad_norm": 5.579345201975561, "learning_rate": 4.151274787535411e-06, "loss": 0.2954, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 2.3145574232330546e-05, "loss_breakdown/pointer_loss": 1.2097866535186768, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 2.387787935731467e-05, "loss_breakdown/pointer_loss": 0.2085389345884323, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 2.0030358427902684e-05, "loss_breakdown/pointer_loss": 0.33110710978507996, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 3.819988705799915e-05, "loss_breakdown/pointer_loss": 0.4256719946861267, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 2.281985871377401e-05, "loss_breakdown/pointer_loss": 0.45906946063041687, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 1.944234099937603e-05, "loss_breakdown/pointer_loss": 0.07625450938940048, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 2.1099318473716266e-05, "loss_breakdown/pointer_loss": 0.14556747674942017, "step": 2480 }, { "epoch": 0.25289671259767244, "loss_breakdown/lm_loss": 3.785505396081135e-05, "loss_breakdown/pointer_loss": 0.28951460123062134, "step": 2480 }, { "epoch": 0.253916457406534, "grad_norm": 3.961349241282406, "learning_rate": 4.145609065155808e-06, "loss": 0.3209, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 2.450537431286648e-05, "loss_breakdown/pointer_loss": 0.3524658679962158, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 2.2155862097861245e-05, "loss_breakdown/pointer_loss": 0.8440600037574768, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 2.4832264898577705e-05, "loss_breakdown/pointer_loss": 0.310818076133728, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 1.8844997612177394e-05, "loss_breakdown/pointer_loss": 0.489116370677948, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 2.5517772883176804e-05, "loss_breakdown/pointer_loss": 0.25569188594818115, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 1.7489883248344995e-05, "loss_breakdown/pointer_loss": 0.13427667319774628, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 1.9007286027772352e-05, "loss_breakdown/pointer_loss": 0.2663806080818176, "step": 2490 }, { "epoch": 0.253916457406534, "loss_breakdown/lm_loss": 2.5070339688681997e-05, "loss_breakdown/pointer_loss": 0.35620540380477905, "step": 2490 }, { "epoch": 0.2549362022153956, "grad_norm": 5.817761542174173, "learning_rate": 4.139943342776204e-06, "loss": 0.3078, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 6.419857527362183e-05, "loss_breakdown/pointer_loss": 3.073803424835205, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 6.129163375589997e-05, "loss_breakdown/pointer_loss": 2.589829921722412, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 2.6087091100635007e-05, "loss_breakdown/pointer_loss": 1.476101040840149, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 2.917783785960637e-05, "loss_breakdown/pointer_loss": 0.43255293369293213, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 2.9479360819095746e-05, "loss_breakdown/pointer_loss": 0.3804207146167755, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 2.2934083972359076e-05, "loss_breakdown/pointer_loss": 0.40150171518325806, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 5.308655818225816e-05, "loss_breakdown/pointer_loss": 0.7171207070350647, "step": 2500 }, { "epoch": 0.2549362022153956, "loss_breakdown/lm_loss": 2.016481630562339e-05, "loss_breakdown/pointer_loss": 0.7341726422309875, "step": 2500 }, { "epoch": 0.25595594702425717, "grad_norm": 3.865445448669092, "learning_rate": 4.1342776203966e-06, "loss": 0.2913, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 1.670632991590537e-05, "loss_breakdown/pointer_loss": 0.7778721451759338, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 2.1802261471748352e-05, "loss_breakdown/pointer_loss": 0.33028194308280945, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 3.059441223740578e-05, "loss_breakdown/pointer_loss": 0.1525982916355133, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 2.2794616597821005e-05, "loss_breakdown/pointer_loss": 0.2430284023284912, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 2.5644354536780156e-05, "loss_breakdown/pointer_loss": 0.5744352340698242, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 2.552559999458026e-05, "loss_breakdown/pointer_loss": 0.6415197849273682, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 2.195932211179752e-05, "loss_breakdown/pointer_loss": 1.727881669998169, "step": 2510 }, { "epoch": 0.25595594702425717, "loss_breakdown/lm_loss": 2.1417070456664078e-05, "loss_breakdown/pointer_loss": 0.2085948884487152, "step": 2510 }, { "epoch": 0.25697569183311875, "grad_norm": 2.048978308406142, "learning_rate": 4.128611898016998e-06, "loss": 0.3292, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 2.197822141170036e-05, "loss_breakdown/pointer_loss": 0.3742096424102783, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 1.910238279378973e-05, "loss_breakdown/pointer_loss": 0.6522914171218872, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 1.4011118764756247e-05, "loss_breakdown/pointer_loss": 0.291398286819458, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 1.9173023247276433e-05, "loss_breakdown/pointer_loss": 0.46481552720069885, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 1.7302325431955978e-05, "loss_breakdown/pointer_loss": 0.5971368551254272, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 2.0506520741037093e-05, "loss_breakdown/pointer_loss": 0.4188334047794342, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 1.861748387455009e-05, "loss_breakdown/pointer_loss": 0.354288250207901, "step": 2520 }, { "epoch": 0.25697569183311875, "loss_breakdown/lm_loss": 2.2785039618611336e-05, "loss_breakdown/pointer_loss": 0.33456137776374817, "step": 2520 }, { "epoch": 0.2579954366419803, "grad_norm": 3.8897299096337403, "learning_rate": 4.122946175637394e-06, "loss": 0.2849, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 2.2573400201508775e-05, "loss_breakdown/pointer_loss": 0.099807009100914, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 3.238661156501621e-05, "loss_breakdown/pointer_loss": 0.23303602635860443, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 2.5036737497430295e-05, "loss_breakdown/pointer_loss": 0.2273131012916565, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 2.220393253082875e-05, "loss_breakdown/pointer_loss": 0.29378557205200195, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 2.4194652723963372e-05, "loss_breakdown/pointer_loss": 0.37514829635620117, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 2.1234332962194458e-05, "loss_breakdown/pointer_loss": 0.7184377312660217, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 1.9033117496292107e-05, "loss_breakdown/pointer_loss": 0.18152305483818054, "step": 2530 }, { "epoch": 0.2579954366419803, "loss_breakdown/lm_loss": 1.9362794773769565e-05, "loss_breakdown/pointer_loss": 0.09951648861169815, "step": 2530 }, { "epoch": 0.2590151814508419, "grad_norm": 2.573037554465987, "learning_rate": 4.117280453257791e-06, "loss": 0.319, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.874453664640896e-05, "loss_breakdown/pointer_loss": 0.30778729915618896, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.8432994693284854e-05, "loss_breakdown/pointer_loss": 0.6700507402420044, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.65065357577987e-05, "loss_breakdown/pointer_loss": 0.4865277111530304, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.8247927073389292e-05, "loss_breakdown/pointer_loss": 0.6528749465942383, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.634515865589492e-05, "loss_breakdown/pointer_loss": 0.29013705253601074, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.4337178072310053e-05, "loss_breakdown/pointer_loss": 0.36657989025115967, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.957957101694774e-05, "loss_breakdown/pointer_loss": 0.5267244577407837, "step": 2540 }, { "epoch": 0.2590151814508419, "loss_breakdown/lm_loss": 1.8900313079939224e-05, "loss_breakdown/pointer_loss": 0.29638364911079407, "step": 2540 }, { "epoch": 0.26003492625970354, "grad_norm": 46.08441289028426, "learning_rate": 4.111614730878187e-06, "loss": 0.3048, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 6.492713873740286e-05, "loss_breakdown/pointer_loss": 2.3160653114318848, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.8171101803309284e-05, "loss_breakdown/pointer_loss": 0.33033236861228943, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.0031917301821522e-05, "loss_breakdown/pointer_loss": 0.6848074197769165, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.173173015762586e-05, "loss_breakdown/pointer_loss": 0.8867232203483582, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.601587948447559e-05, "loss_breakdown/pointer_loss": 0.36807334423065186, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.962990947708022e-05, "loss_breakdown/pointer_loss": 1.1229476928710938, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.1860496417502873e-05, "loss_breakdown/pointer_loss": 0.683104395866394, "step": 2550 }, { "epoch": 0.26003492625970354, "loss_breakdown/lm_loss": 2.2776317564421333e-05, "loss_breakdown/pointer_loss": 0.35441020131111145, "step": 2550 }, { "epoch": 0.2610546710685651, "grad_norm": 5.731314887261668, "learning_rate": 4.105949008498584e-06, "loss": 0.3149, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 1.4265030586102512e-05, "loss_breakdown/pointer_loss": 0.15636271238327026, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 1.6943762602750212e-05, "loss_breakdown/pointer_loss": 0.4055531620979309, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 1.718679050100036e-05, "loss_breakdown/pointer_loss": 0.3487583100795746, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 1.4785471648792736e-05, "loss_breakdown/pointer_loss": 0.24240106344223022, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 2.8705597287626006e-05, "loss_breakdown/pointer_loss": 0.606195867061615, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 3.74649025616236e-05, "loss_breakdown/pointer_loss": 0.2739773988723755, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 1.5904734027571976e-05, "loss_breakdown/pointer_loss": 0.8852041363716125, "step": 2560 }, { "epoch": 0.2610546710685651, "loss_breakdown/lm_loss": 2.4126913558575325e-05, "loss_breakdown/pointer_loss": 3.371903657913208, "step": 2560 }, { "epoch": 0.2620744158774267, "grad_norm": 3.2493363173673844, "learning_rate": 4.10028328611898e-06, "loss": 0.3425, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 2.0067765944986604e-05, "loss_breakdown/pointer_loss": 0.3753741383552551, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 2.6839545171242207e-05, "loss_breakdown/pointer_loss": 0.3225935995578766, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 2.6791552954819053e-05, "loss_breakdown/pointer_loss": 0.1865757703781128, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 2.229082201665733e-05, "loss_breakdown/pointer_loss": 0.23807570338249207, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 2.6975112632499076e-05, "loss_breakdown/pointer_loss": 0.2800438106060028, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 1.9558357962523587e-05, "loss_breakdown/pointer_loss": 0.4910861849784851, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 1.9845483620883897e-05, "loss_breakdown/pointer_loss": 0.4467753767967224, "step": 2570 }, { "epoch": 0.2620744158774267, "loss_breakdown/lm_loss": 1.5146001715038437e-05, "loss_breakdown/pointer_loss": 0.36940798163414, "step": 2570 }, { "epoch": 0.26309416068628827, "grad_norm": 10.744896467869689, "learning_rate": 4.094617563739377e-06, "loss": 0.2899, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 3.797728641075082e-05, "loss_breakdown/pointer_loss": 1.7115180492401123, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 2.3792481442796998e-05, "loss_breakdown/pointer_loss": 0.07937340438365936, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 2.2934873413760215e-05, "loss_breakdown/pointer_loss": 0.6682505011558533, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 2.5564595489413477e-05, "loss_breakdown/pointer_loss": 0.312053918838501, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 2.5032575649674982e-05, "loss_breakdown/pointer_loss": 0.23970264196395874, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 2.561656765465159e-05, "loss_breakdown/pointer_loss": 0.058962978422641754, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 3.4468841477064416e-05, "loss_breakdown/pointer_loss": 0.18306928873062134, "step": 2580 }, { "epoch": 0.26309416068628827, "loss_breakdown/lm_loss": 2.0276558643672615e-05, "loss_breakdown/pointer_loss": 0.17720988392829895, "step": 2580 }, { "epoch": 0.26411390549514985, "grad_norm": 2.1996351320977716, "learning_rate": 4.088951841359774e-06, "loss": 0.3245, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 1.861455530161038e-05, "loss_breakdown/pointer_loss": 0.32485178112983704, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 1.855497066571843e-05, "loss_breakdown/pointer_loss": 0.20537135004997253, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 1.4290188119048253e-05, "loss_breakdown/pointer_loss": 0.20838049054145813, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 3.3361506211804226e-05, "loss_breakdown/pointer_loss": 2.017321825027466, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 2.033978125837166e-05, "loss_breakdown/pointer_loss": 0.6461074948310852, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 1.9237677406636067e-05, "loss_breakdown/pointer_loss": 0.4688935875892639, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 1.4084450413065497e-05, "loss_breakdown/pointer_loss": 0.1521109938621521, "step": 2590 }, { "epoch": 0.26411390549514985, "loss_breakdown/lm_loss": 1.7263855625060387e-05, "loss_breakdown/pointer_loss": 0.5048814415931702, "step": 2590 }, { "epoch": 0.2651336503040114, "grad_norm": 11.373395890378209, "learning_rate": 4.083286118980171e-06, "loss": 0.2994, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 0.000165250021382235, "loss_breakdown/pointer_loss": 1.364975929260254, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 3.0519524443661794e-05, "loss_breakdown/pointer_loss": 0.7096906900405884, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 2.172489621443674e-05, "loss_breakdown/pointer_loss": 1.667944073677063, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 3.144479705952108e-05, "loss_breakdown/pointer_loss": 0.5750303268432617, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 2.332023905182723e-05, "loss_breakdown/pointer_loss": 0.856249213218689, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 2.788588244584389e-05, "loss_breakdown/pointer_loss": 0.381686806678772, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 1.835124203353189e-05, "loss_breakdown/pointer_loss": 0.4912039041519165, "step": 2600 }, { "epoch": 0.2651336503040114, "loss_breakdown/lm_loss": 1.6691137716406956e-05, "loss_breakdown/pointer_loss": 1.0538042783737183, "step": 2600 }, { "epoch": 0.266153395112873, "grad_norm": 4.658353077157773, "learning_rate": 4.077620396600567e-06, "loss": 0.2953, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 1.4719557839271147e-05, "loss_breakdown/pointer_loss": 0.2197539061307907, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 1.663705552346073e-05, "loss_breakdown/pointer_loss": 0.551537036895752, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 1.9528419215930626e-05, "loss_breakdown/pointer_loss": 0.060967136174440384, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 1.5764266208861955e-05, "loss_breakdown/pointer_loss": 0.2790575921535492, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 1.9064902517129667e-05, "loss_breakdown/pointer_loss": 0.13271239399909973, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 2.3606360628036782e-05, "loss_breakdown/pointer_loss": 2.811084747314453, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 2.1742975150118582e-05, "loss_breakdown/pointer_loss": 0.38726431131362915, "step": 2610 }, { "epoch": 0.266153395112873, "loss_breakdown/lm_loss": 1.6657055311952718e-05, "loss_breakdown/pointer_loss": 0.16323311626911163, "step": 2610 }, { "epoch": 0.2671731399217346, "grad_norm": 12.79197333506465, "learning_rate": 4.071954674220964e-06, "loss": 0.3086, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 1.648117722652387e-05, "loss_breakdown/pointer_loss": 0.1891867220401764, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 1.9006127331522293e-05, "loss_breakdown/pointer_loss": 0.3085564970970154, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 2.1172330889385194e-05, "loss_breakdown/pointer_loss": 0.4029655456542969, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 1.957043605216313e-05, "loss_breakdown/pointer_loss": 0.26070353388786316, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 1.544698534416966e-05, "loss_breakdown/pointer_loss": 0.5685430765151978, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 2.2054186047171243e-05, "loss_breakdown/pointer_loss": 0.21202385425567627, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 1.7289909010287374e-05, "loss_breakdown/pointer_loss": 0.5814509391784668, "step": 2620 }, { "epoch": 0.2671731399217346, "loss_breakdown/lm_loss": 1.8737464415607974e-05, "loss_breakdown/pointer_loss": 0.3398367762565613, "step": 2620 }, { "epoch": 0.26819288473059616, "grad_norm": 9.785679204451279, "learning_rate": 4.06628895184136e-06, "loss": 0.3036, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 1.756290475896094e-05, "loss_breakdown/pointer_loss": 2.1608338356018066, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 2.981943907798268e-05, "loss_breakdown/pointer_loss": 0.30799323320388794, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 1.765038541634567e-05, "loss_breakdown/pointer_loss": 0.29570794105529785, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 2.239423520222772e-05, "loss_breakdown/pointer_loss": 0.21768055856227875, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 1.940261608979199e-05, "loss_breakdown/pointer_loss": 0.059980083256959915, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 1.4674204976472538e-05, "loss_breakdown/pointer_loss": 0.3114214539527893, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 2.0050267266924493e-05, "loss_breakdown/pointer_loss": 1.023423671722412, "step": 2630 }, { "epoch": 0.26819288473059616, "loss_breakdown/lm_loss": 3.14011012960691e-05, "loss_breakdown/pointer_loss": 0.13173282146453857, "step": 2630 }, { "epoch": 0.26921262953945774, "grad_norm": 2.755421519381968, "learning_rate": 4.060623229461757e-06, "loss": 0.3091, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.6495450836373493e-05, "loss_breakdown/pointer_loss": 0.43474799394607544, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.7086016669054516e-05, "loss_breakdown/pointer_loss": 0.22260257601737976, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.5682184312026948e-05, "loss_breakdown/pointer_loss": 0.31397807598114014, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.5027896552055608e-05, "loss_breakdown/pointer_loss": 0.32645559310913086, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.9038192476728e-05, "loss_breakdown/pointer_loss": 0.053734004497528076, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.3248809409560636e-05, "loss_breakdown/pointer_loss": 0.14846253395080566, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.674175109656062e-05, "loss_breakdown/pointer_loss": 0.27503079175949097, "step": 2640 }, { "epoch": 0.26921262953945774, "loss_breakdown/lm_loss": 1.7489041056251153e-05, "loss_breakdown/pointer_loss": 0.1326475441455841, "step": 2640 }, { "epoch": 0.2702323743483193, "grad_norm": 8.150194383467054, "learning_rate": 4.054957507082153e-06, "loss": 0.2896, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 8.232882828451693e-05, "loss_breakdown/pointer_loss": 1.3910621404647827, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 4.038731276523322e-05, "loss_breakdown/pointer_loss": 2.044741153717041, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 4.794895721715875e-05, "loss_breakdown/pointer_loss": 1.5250380039215088, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 4.669592817663215e-05, "loss_breakdown/pointer_loss": 0.48935574293136597, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 2.2994661776465364e-05, "loss_breakdown/pointer_loss": 1.9501694440841675, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 1.7377642507199198e-05, "loss_breakdown/pointer_loss": 0.4129220247268677, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 2.8889464374515228e-05, "loss_breakdown/pointer_loss": 1.319312334060669, "step": 2650 }, { "epoch": 0.2702323743483193, "loss_breakdown/lm_loss": 1.580288153490983e-05, "loss_breakdown/pointer_loss": 0.34235283732414246, "step": 2650 }, { "epoch": 0.2712521191571809, "grad_norm": 4.011291298951231, "learning_rate": 4.04929178470255e-06, "loss": 0.2916, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 3.822013968601823e-05, "loss_breakdown/pointer_loss": 0.2975747585296631, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 1.3698675502382684e-05, "loss_breakdown/pointer_loss": 0.17628361284732819, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 1.9282097127870657e-05, "loss_breakdown/pointer_loss": 0.8408188819885254, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 1.950473597389646e-05, "loss_breakdown/pointer_loss": 0.2716163396835327, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 2.2380798327503726e-05, "loss_breakdown/pointer_loss": 0.11992816627025604, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 2.6657715352484956e-05, "loss_breakdown/pointer_loss": 0.20108816027641296, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 1.7268850569962524e-05, "loss_breakdown/pointer_loss": 0.09567388892173767, "step": 2660 }, { "epoch": 0.2712521191571809, "loss_breakdown/lm_loss": 1.9716515453183092e-05, "loss_breakdown/pointer_loss": 0.3474190831184387, "step": 2660 }, { "epoch": 0.27227186396604247, "grad_norm": 2.2877211321578743, "learning_rate": 4.043626062322947e-06, "loss": 0.323, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 2.832250902429223e-05, "loss_breakdown/pointer_loss": 0.44851046800613403, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 2.574628706497606e-05, "loss_breakdown/pointer_loss": 1.7256519794464111, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 1.867975151981227e-05, "loss_breakdown/pointer_loss": 0.2580944299697876, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 1.673465703788679e-05, "loss_breakdown/pointer_loss": 0.3320351839065552, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 1.960903318831697e-05, "loss_breakdown/pointer_loss": 0.6359220147132874, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 1.5066450032463763e-05, "loss_breakdown/pointer_loss": 0.21038472652435303, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 2.3301579858525656e-05, "loss_breakdown/pointer_loss": 0.23456192016601562, "step": 2670 }, { "epoch": 0.27227186396604247, "loss_breakdown/lm_loss": 2.4834223950165324e-05, "loss_breakdown/pointer_loss": 0.3629222512245178, "step": 2670 }, { "epoch": 0.2732916087749041, "grad_norm": 4.4284761254102065, "learning_rate": 4.0379603399433435e-06, "loss": 0.3203, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 2.06977674679365e-05, "loss_breakdown/pointer_loss": 0.0981961190700531, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 3.335050860187039e-05, "loss_breakdown/pointer_loss": 4.661577224731445, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 1.9998493371531367e-05, "loss_breakdown/pointer_loss": 0.10190683603286743, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 2.7841224436997436e-05, "loss_breakdown/pointer_loss": 0.09047291427850723, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 2.311747994099278e-05, "loss_breakdown/pointer_loss": 0.23432135581970215, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 2.26447700697463e-05, "loss_breakdown/pointer_loss": 0.6263689994812012, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 2.3733202397124842e-05, "loss_breakdown/pointer_loss": 0.2845093309879303, "step": 2680 }, { "epoch": 0.2732916087749041, "loss_breakdown/lm_loss": 2.3407501430483535e-05, "loss_breakdown/pointer_loss": 2.7974724769592285, "step": 2680 }, { "epoch": 0.2743113535837657, "grad_norm": 3.9314427634922526, "learning_rate": 4.032294617563739e-06, "loss": 0.3372, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.55403686221689e-05, "loss_breakdown/pointer_loss": 0.26430678367614746, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.887309736048337e-05, "loss_breakdown/pointer_loss": 0.21046186983585358, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.8798365999828093e-05, "loss_breakdown/pointer_loss": 0.2356092482805252, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.6644446077407338e-05, "loss_breakdown/pointer_loss": 0.5172469019889832, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.578174669703003e-05, "loss_breakdown/pointer_loss": 0.18552234768867493, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.568235529703088e-05, "loss_breakdown/pointer_loss": 0.36569106578826904, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.7975973605643958e-05, "loss_breakdown/pointer_loss": 0.13662222027778625, "step": 2690 }, { "epoch": 0.2743113535837657, "loss_breakdown/lm_loss": 1.5623834769940004e-05, "loss_breakdown/pointer_loss": 0.29238107800483704, "step": 2690 }, { "epoch": 0.27533109839262726, "grad_norm": 12.29985112874742, "learning_rate": 4.0266288951841365e-06, "loss": 0.2785, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 9.55282521317713e-05, "loss_breakdown/pointer_loss": 2.2213222980499268, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 3.967537122662179e-05, "loss_breakdown/pointer_loss": 0.5645738840103149, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 3.763499989872798e-05, "loss_breakdown/pointer_loss": 0.4041786789894104, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 3.02237367577618e-05, "loss_breakdown/pointer_loss": 0.443379670381546, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 2.7824942662846297e-05, "loss_breakdown/pointer_loss": 0.8630914688110352, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 4.573183105094358e-05, "loss_breakdown/pointer_loss": 1.1392474174499512, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 1.8719129002420232e-05, "loss_breakdown/pointer_loss": 0.5896714329719543, "step": 2700 }, { "epoch": 0.27533109839262726, "loss_breakdown/lm_loss": 2.1412144633359276e-05, "loss_breakdown/pointer_loss": 0.35811543464660645, "step": 2700 }, { "epoch": 0.27635084320148884, "grad_norm": 3.1548550731900975, "learning_rate": 4.020963172804533e-06, "loss": 0.2933, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 1.718258499749936e-05, "loss_breakdown/pointer_loss": 0.2903551459312439, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 1.561989847687073e-05, "loss_breakdown/pointer_loss": 0.12896859645843506, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 1.5178903595369775e-05, "loss_breakdown/pointer_loss": 0.14574432373046875, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 2.177473288611509e-05, "loss_breakdown/pointer_loss": 0.6473047733306885, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 1.5212649486784358e-05, "loss_breakdown/pointer_loss": 0.20888668298721313, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 2.2676802473142743e-05, "loss_breakdown/pointer_loss": 0.6899890899658203, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 1.691132456471678e-05, "loss_breakdown/pointer_loss": 0.1178366020321846, "step": 2710 }, { "epoch": 0.27635084320148884, "loss_breakdown/lm_loss": 1.8097309293807484e-05, "loss_breakdown/pointer_loss": 0.36934512853622437, "step": 2710 }, { "epoch": 0.2773705880103504, "grad_norm": 6.976213836365117, "learning_rate": 4.0152974504249295e-06, "loss": 0.3365, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.881837124528829e-05, "loss_breakdown/pointer_loss": 1.0609105825424194, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.834584145399276e-05, "loss_breakdown/pointer_loss": 0.9404895305633545, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.5993553461157717e-05, "loss_breakdown/pointer_loss": 0.2810651361942291, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.786137727322057e-05, "loss_breakdown/pointer_loss": 1.1204497814178467, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.7223133909283206e-05, "loss_breakdown/pointer_loss": 0.649703860282898, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.7707576262182556e-05, "loss_breakdown/pointer_loss": 0.30416885018348694, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.891006832011044e-05, "loss_breakdown/pointer_loss": 0.4913507103919983, "step": 2720 }, { "epoch": 0.2773705880103504, "loss_breakdown/lm_loss": 1.4738348909304477e-05, "loss_breakdown/pointer_loss": 1.7411785125732422, "step": 2720 }, { "epoch": 0.278390332819212, "grad_norm": 25.96856494614865, "learning_rate": 4.009631728045326e-06, "loss": 0.2735, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 1.5786790754646063e-05, "loss_breakdown/pointer_loss": 0.198554128408432, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 1.7590764400665648e-05, "loss_breakdown/pointer_loss": 2.3488388061523438, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 1.6839812815305777e-05, "loss_breakdown/pointer_loss": 0.12361957877874374, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 1.428487212251639e-05, "loss_breakdown/pointer_loss": 0.06342288851737976, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 1.7682121324469335e-05, "loss_breakdown/pointer_loss": 0.10697579383850098, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 2.1353442207328044e-05, "loss_breakdown/pointer_loss": 0.5768455266952515, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 2.4623524950584397e-05, "loss_breakdown/pointer_loss": 0.19261544942855835, "step": 2730 }, { "epoch": 0.278390332819212, "loss_breakdown/lm_loss": 2.2545189494849183e-05, "loss_breakdown/pointer_loss": 5.489098072052002, "step": 2730 }, { "epoch": 0.27941007762807357, "grad_norm": 2.1621560848681876, "learning_rate": 4.0039660056657225e-06, "loss": 0.3248, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.2326515388849657e-05, "loss_breakdown/pointer_loss": 0.314316987991333, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.5962557881721295e-05, "loss_breakdown/pointer_loss": 0.396426260471344, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.3374442460190039e-05, "loss_breakdown/pointer_loss": 0.8026024103164673, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.5725770936114714e-05, "loss_breakdown/pointer_loss": 0.046289220452308655, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 4.4012878788635135e-05, "loss_breakdown/pointer_loss": 1.3191275596618652, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.602919292054139e-05, "loss_breakdown/pointer_loss": 0.15583215653896332, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.420277294528205e-05, "loss_breakdown/pointer_loss": 0.2520408630371094, "step": 2740 }, { "epoch": 0.27941007762807357, "loss_breakdown/lm_loss": 1.5672898371121846e-05, "loss_breakdown/pointer_loss": 0.7705196738243103, "step": 2740 }, { "epoch": 0.28042982243693515, "grad_norm": 6.950588278332541, "learning_rate": 3.99830028328612e-06, "loss": 0.3001, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 9.18694058782421e-05, "loss_breakdown/pointer_loss": 0.7771829962730408, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 4.1335213609272614e-05, "loss_breakdown/pointer_loss": 2.1887941360473633, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 2.8560923965414986e-05, "loss_breakdown/pointer_loss": 0.8174663782119751, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 2.0685056369984522e-05, "loss_breakdown/pointer_loss": 0.37189245223999023, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 2.7295835025142878e-05, "loss_breakdown/pointer_loss": 0.978448748588562, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 2.4193006538553163e-05, "loss_breakdown/pointer_loss": 0.4092097878456116, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 1.8040178474620916e-05, "loss_breakdown/pointer_loss": 0.8064420223236084, "step": 2750 }, { "epoch": 0.28042982243693515, "loss_breakdown/lm_loss": 2.0801568098249845e-05, "loss_breakdown/pointer_loss": 0.8862636685371399, "step": 2750 }, { "epoch": 0.28144956724579673, "grad_norm": 15.086505104433844, "learning_rate": 3.9926345609065155e-06, "loss": 0.2909, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 1.3374964510148857e-05, "loss_breakdown/pointer_loss": 0.17989404499530792, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 1.2745847016049083e-05, "loss_breakdown/pointer_loss": 0.3047564923763275, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 1.54252029460622e-05, "loss_breakdown/pointer_loss": 0.1065337210893631, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 1.8254278984386474e-05, "loss_breakdown/pointer_loss": 3.0314836502075195, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 2.0332468920969404e-05, "loss_breakdown/pointer_loss": 0.14414867758750916, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 2.239852437924128e-05, "loss_breakdown/pointer_loss": 0.7725878953933716, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 1.8401469787932e-05, "loss_breakdown/pointer_loss": 0.19908452033996582, "step": 2760 }, { "epoch": 0.28144956724579673, "loss_breakdown/lm_loss": 2.0094046703889035e-05, "loss_breakdown/pointer_loss": 0.22667084634304047, "step": 2760 }, { "epoch": 0.2824693120546583, "grad_norm": 2.203567970908618, "learning_rate": 3.986968838526912e-06, "loss": 0.2835, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.9883860659319907e-05, "loss_breakdown/pointer_loss": 0.25165724754333496, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.447263639420271e-05, "loss_breakdown/pointer_loss": 0.7999439239501953, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.5242083463817835e-05, "loss_breakdown/pointer_loss": 0.2872806191444397, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.8221682694274932e-05, "loss_breakdown/pointer_loss": 0.5223303437232971, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.8924685718957335e-05, "loss_breakdown/pointer_loss": 0.8128043413162231, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 2.0178196791675873e-05, "loss_breakdown/pointer_loss": 0.5214812159538269, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.8567661754786968e-05, "loss_breakdown/pointer_loss": 0.5571932792663574, "step": 2770 }, { "epoch": 0.2824693120546583, "loss_breakdown/lm_loss": 1.9989351130789146e-05, "loss_breakdown/pointer_loss": 0.17264997959136963, "step": 2770 }, { "epoch": 0.2834890568635199, "grad_norm": 4.584348374734272, "learning_rate": 3.981303116147309e-06, "loss": 0.2635, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.8047732737613842e-05, "loss_breakdown/pointer_loss": 0.1635356843471527, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.640274240344297e-05, "loss_breakdown/pointer_loss": 0.1342364102602005, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.7988115359912626e-05, "loss_breakdown/pointer_loss": 0.3972931504249573, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.558947224111762e-05, "loss_breakdown/pointer_loss": 0.1223893091082573, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.549680564494338e-05, "loss_breakdown/pointer_loss": 0.04638974368572235, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.973646976693999e-05, "loss_breakdown/pointer_loss": 1.5697437524795532, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 1.8588219973025844e-05, "loss_breakdown/pointer_loss": 2.3439011573791504, "step": 2780 }, { "epoch": 0.2834890568635199, "loss_breakdown/lm_loss": 2.2307318431558087e-05, "loss_breakdown/pointer_loss": 0.37659692764282227, "step": 2780 }, { "epoch": 0.28450880167238146, "grad_norm": 3.936247236161045, "learning_rate": 3.975637393767706e-06, "loss": 0.3396, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.9653751223813742e-05, "loss_breakdown/pointer_loss": 0.6953035593032837, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.9565042748581618e-05, "loss_breakdown/pointer_loss": 0.33902353048324585, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.3855775250704028e-05, "loss_breakdown/pointer_loss": 0.21544073522090912, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 2.078775105474051e-05, "loss_breakdown/pointer_loss": 0.6244826912879944, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.5448165868292563e-05, "loss_breakdown/pointer_loss": 0.421382337808609, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.2241208423802163e-05, "loss_breakdown/pointer_loss": 0.16363802552223206, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.4057059161132202e-05, "loss_breakdown/pointer_loss": 0.2342863827943802, "step": 2790 }, { "epoch": 0.28450880167238146, "loss_breakdown/lm_loss": 1.8672541045816615e-05, "loss_breakdown/pointer_loss": 1.0026068687438965, "step": 2790 }, { "epoch": 0.2855285464812431, "grad_norm": 22.36327798520366, "learning_rate": 3.969971671388102e-06, "loss": 0.2954, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 6.572130223503336e-05, "loss_breakdown/pointer_loss": 1.3663215637207031, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 2.8040079996571876e-05, "loss_breakdown/pointer_loss": 0.7135162353515625, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 3.159284096909687e-05, "loss_breakdown/pointer_loss": 0.5488244891166687, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 2.8275058866711333e-05, "loss_breakdown/pointer_loss": 0.5481417775154114, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 2.0858205971308053e-05, "loss_breakdown/pointer_loss": 0.47511768341064453, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 2.0330870029283687e-05, "loss_breakdown/pointer_loss": 0.16757318377494812, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 0.0001465710229240358, "loss_breakdown/pointer_loss": 1.278182029724121, "step": 2800 }, { "epoch": 0.2855285464812431, "loss_breakdown/lm_loss": 3.01710533676669e-05, "loss_breakdown/pointer_loss": 0.4783661961555481, "step": 2800 }, { "epoch": 0.2865482912901047, "grad_norm": 8.018377659552675, "learning_rate": 3.964305949008499e-06, "loss": 0.2807, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.1123312106064986e-05, "loss_breakdown/pointer_loss": 0.05882256478071213, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.3792136996926274e-05, "loss_breakdown/pointer_loss": 0.29098451137542725, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.3434586435323581e-05, "loss_breakdown/pointer_loss": 0.1255677193403244, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.764359330991283e-05, "loss_breakdown/pointer_loss": 0.18354623019695282, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.4984167137299664e-05, "loss_breakdown/pointer_loss": 2.0918898582458496, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.6739564671297558e-05, "loss_breakdown/pointer_loss": 0.13399562239646912, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 2.4722849047975615e-05, "loss_breakdown/pointer_loss": 1.9068702459335327, "step": 2810 }, { "epoch": 0.2865482912901047, "loss_breakdown/lm_loss": 1.5305950000765733e-05, "loss_breakdown/pointer_loss": 0.046690456569194794, "step": 2810 }, { "epoch": 0.28756803609896625, "grad_norm": 2.5032042355558115, "learning_rate": 3.958640226628895e-06, "loss": 0.3028, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.5852958313189447e-05, "loss_breakdown/pointer_loss": 0.40546339750289917, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.6754132957430556e-05, "loss_breakdown/pointer_loss": 1.154778242111206, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.8294569599675015e-05, "loss_breakdown/pointer_loss": 1.443705439567566, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.2759328456013463e-05, "loss_breakdown/pointer_loss": 0.4275585412979126, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.2236826478329021e-05, "loss_breakdown/pointer_loss": 0.28658872842788696, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.3276935533212963e-05, "loss_breakdown/pointer_loss": 0.18443700671195984, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.556305869598873e-05, "loss_breakdown/pointer_loss": 1.2347140312194824, "step": 2820 }, { "epoch": 0.28756803609896625, "loss_breakdown/lm_loss": 1.2340708963165525e-05, "loss_breakdown/pointer_loss": 0.3541794419288635, "step": 2820 }, { "epoch": 0.28858778090782783, "grad_norm": 11.107614202427175, "learning_rate": 3.952974504249292e-06, "loss": 0.3077, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.4853095308353659e-05, "loss_breakdown/pointer_loss": 0.8571216464042664, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.1817381164291874e-05, "loss_breakdown/pointer_loss": 0.1886330097913742, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.241736208612565e-05, "loss_breakdown/pointer_loss": 0.19286498427391052, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.6017138477764092e-05, "loss_breakdown/pointer_loss": 0.5197542309761047, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.7682188627077267e-05, "loss_breakdown/pointer_loss": 0.11175251007080078, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.5707386410213076e-05, "loss_breakdown/pointer_loss": 0.24760079383850098, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 2.1162468328839168e-05, "loss_breakdown/pointer_loss": 0.4330415725708008, "step": 2830 }, { "epoch": 0.28858778090782783, "loss_breakdown/lm_loss": 1.711387994873803e-05, "loss_breakdown/pointer_loss": 0.10303033888339996, "step": 2830 }, { "epoch": 0.2896075257166894, "grad_norm": 7.590063088267811, "learning_rate": 3.947308781869688e-06, "loss": 0.3174, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.4705650755786337e-05, "loss_breakdown/pointer_loss": 0.3575945794582367, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.4268207451095805e-05, "loss_breakdown/pointer_loss": 0.46114304661750793, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.2610666090040468e-05, "loss_breakdown/pointer_loss": 0.45820412039756775, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 2.533151928219013e-05, "loss_breakdown/pointer_loss": 0.13653257489204407, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.347205943602603e-05, "loss_breakdown/pointer_loss": 0.5371060967445374, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.4473926057689823e-05, "loss_breakdown/pointer_loss": 0.39191290736198425, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.7655913325143047e-05, "loss_breakdown/pointer_loss": 0.21358083188533783, "step": 2840 }, { "epoch": 0.2896075257166894, "loss_breakdown/lm_loss": 1.0180267963733058e-05, "loss_breakdown/pointer_loss": 0.25312989950180054, "step": 2840 }, { "epoch": 0.290627270525551, "grad_norm": 56.88416600386948, "learning_rate": 3.941643059490085e-06, "loss": 0.3299, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 0.0003644067619461566, "loss_breakdown/pointer_loss": 1.9689984321594238, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 3.34325413859915e-05, "loss_breakdown/pointer_loss": 1.5174559354782104, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 3.33583120664116e-05, "loss_breakdown/pointer_loss": 1.88478684425354, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 0.0031289237085729837, "loss_breakdown/pointer_loss": 0.7395635843276978, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 2.1153839043108746e-05, "loss_breakdown/pointer_loss": 0.6895118355751038, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 2.2557425836566836e-05, "loss_breakdown/pointer_loss": 0.4220704436302185, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 1.654198786127381e-05, "loss_breakdown/pointer_loss": 0.41880184412002563, "step": 2850 }, { "epoch": 0.290627270525551, "loss_breakdown/lm_loss": 1.6860371033544652e-05, "loss_breakdown/pointer_loss": 0.561532199382782, "step": 2850 }, { "epoch": 0.29164701533441256, "grad_norm": 18.618065011850792, "learning_rate": 3.935977337110482e-06, "loss": 0.2856, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 1.75138429767685e-05, "loss_breakdown/pointer_loss": 0.17111161351203918, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 3.0547082133125514e-05, "loss_breakdown/pointer_loss": 0.37409621477127075, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 1.684498602116946e-05, "loss_breakdown/pointer_loss": 0.1937020719051361, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 2.3440825316356495e-05, "loss_breakdown/pointer_loss": 0.27456772327423096, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 0.0001249728666152805, "loss_breakdown/pointer_loss": 0.19013479351997375, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 2.047931047854945e-05, "loss_breakdown/pointer_loss": 0.13119667768478394, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 5.485683141159825e-05, "loss_breakdown/pointer_loss": 1.1984692811965942, "step": 2860 }, { "epoch": 0.29164701533441256, "loss_breakdown/lm_loss": 1.9575378246372566e-05, "loss_breakdown/pointer_loss": 1.4031705856323242, "step": 2860 }, { "epoch": 0.29266676014327414, "grad_norm": 2.7534755842338465, "learning_rate": 3.930311614730879e-06, "loss": 0.3132, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.735970363370143e-05, "loss_breakdown/pointer_loss": 0.2839401066303253, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.4135404853732325e-05, "loss_breakdown/pointer_loss": 0.5364599823951721, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.4547255887009669e-05, "loss_breakdown/pointer_loss": 1.2127376794815063, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.8420858395984396e-05, "loss_breakdown/pointer_loss": 0.36921533942222595, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.4540655683958903e-05, "loss_breakdown/pointer_loss": 0.2901404798030853, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.0920858585450333e-05, "loss_breakdown/pointer_loss": 0.622044026851654, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.5459536371054128e-05, "loss_breakdown/pointer_loss": 0.5372545719146729, "step": 2870 }, { "epoch": 0.29266676014327414, "loss_breakdown/lm_loss": 1.1409139005991165e-05, "loss_breakdown/pointer_loss": 0.469581663608551, "step": 2870 }, { "epoch": 0.2936865049521357, "grad_norm": 9.886687648345447, "learning_rate": 3.924645892351275e-06, "loss": 0.2818, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.33034245664021e-05, "loss_breakdown/pointer_loss": 0.14767612516880035, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.5286237612599507e-05, "loss_breakdown/pointer_loss": 0.36011767387390137, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.1594853276619688e-05, "loss_breakdown/pointer_loss": 0.06259331107139587, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.230608995683724e-05, "loss_breakdown/pointer_loss": 2.5191776752471924, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.9211815015296452e-05, "loss_breakdown/pointer_loss": 0.978522002696991, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.4912719962012488e-05, "loss_breakdown/pointer_loss": 0.19218094646930695, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.4733881471329369e-05, "loss_breakdown/pointer_loss": 0.27707064151763916, "step": 2880 }, { "epoch": 0.2936865049521357, "loss_breakdown/lm_loss": 1.817485099309124e-05, "loss_breakdown/pointer_loss": 0.13431954383850098, "step": 2880 }, { "epoch": 0.2947062497609973, "grad_norm": 2.954212900849386, "learning_rate": 3.918980169971672e-06, "loss": 0.315, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.4028511941432953e-05, "loss_breakdown/pointer_loss": 1.6631966829299927, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.4211345842340961e-05, "loss_breakdown/pointer_loss": 0.5703490376472473, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.3168320947443135e-05, "loss_breakdown/pointer_loss": 0.20128728449344635, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.346655062661739e-05, "loss_breakdown/pointer_loss": 1.01651132106781, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.1557998732314445e-05, "loss_breakdown/pointer_loss": 0.8102008104324341, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.3671147826244123e-05, "loss_breakdown/pointer_loss": 0.6003637313842773, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.5542991604888812e-05, "loss_breakdown/pointer_loss": 0.6266191601753235, "step": 2890 }, { "epoch": 0.2947062497609973, "loss_breakdown/lm_loss": 1.5513620382989757e-05, "loss_breakdown/pointer_loss": 0.22370171546936035, "step": 2890 }, { "epoch": 0.2957259945698589, "grad_norm": 5.200429135449425, "learning_rate": 3.913314447592068e-06, "loss": 0.2939, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 5.9510344726732e-05, "loss_breakdown/pointer_loss": 0.4374992251396179, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 5.9989786677761e-05, "loss_breakdown/pointer_loss": 0.5050381422042847, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 1.982250796572771e-05, "loss_breakdown/pointer_loss": 1.098515510559082, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 2.7727581255021505e-05, "loss_breakdown/pointer_loss": 1.0648548603057861, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 2.3851607693359256e-05, "loss_breakdown/pointer_loss": 0.33308643102645874, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 1.6717349353712052e-05, "loss_breakdown/pointer_loss": 0.32074159383773804, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 1.4996667232480831e-05, "loss_breakdown/pointer_loss": 0.5144505500793457, "step": 2900 }, { "epoch": 0.2957259945698589, "loss_breakdown/lm_loss": 2.5219074814231135e-05, "loss_breakdown/pointer_loss": 0.7072017788887024, "step": 2900 }, { "epoch": 0.29674573937872045, "grad_norm": 3.4262231392020834, "learning_rate": 3.907648725212465e-06, "loss": 0.3136, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 1.4205279512680136e-05, "loss_breakdown/pointer_loss": 0.24696041643619537, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 1.7723514247336425e-05, "loss_breakdown/pointer_loss": 0.19067199528217316, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 1.5524483387707733e-05, "loss_breakdown/pointer_loss": 0.047150835394859314, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 1.3245797163108364e-05, "loss_breakdown/pointer_loss": 0.4214489459991455, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 1.590662395756226e-05, "loss_breakdown/pointer_loss": 0.1852378249168396, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 1.4294079846877139e-05, "loss_breakdown/pointer_loss": 0.14174672961235046, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 2.1544115952565335e-05, "loss_breakdown/pointer_loss": 0.23752330243587494, "step": 2910 }, { "epoch": 0.29674573937872045, "loss_breakdown/lm_loss": 2.07416796911275e-05, "loss_breakdown/pointer_loss": 1.907684564590454, "step": 2910 }, { "epoch": 0.29776548418758203, "grad_norm": 2.8892965425136286, "learning_rate": 3.901983002832861e-06, "loss": 0.3023, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.978896398213692e-05, "loss_breakdown/pointer_loss": 0.16146615147590637, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.503608473285567e-05, "loss_breakdown/pointer_loss": 0.6142574548721313, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.73838507180335e-05, "loss_breakdown/pointer_loss": 0.2833261489868164, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.4724872016813606e-05, "loss_breakdown/pointer_loss": 1.1985812187194824, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.4788464795856271e-05, "loss_breakdown/pointer_loss": 0.30008581280708313, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.4837098206044175e-05, "loss_breakdown/pointer_loss": 0.6003395915031433, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.2151072951382957e-05, "loss_breakdown/pointer_loss": 1.5125073194503784, "step": 2920 }, { "epoch": 0.29776548418758203, "loss_breakdown/lm_loss": 1.552545109007042e-05, "loss_breakdown/pointer_loss": 0.22375378012657166, "step": 2920 }, { "epoch": 0.29878522899644366, "grad_norm": 4.712102033109772, "learning_rate": 3.896317280453258e-06, "loss": 0.2725, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.7997394024860114e-05, "loss_breakdown/pointer_loss": 1.5733234882354736, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.3732579645875376e-05, "loss_breakdown/pointer_loss": 0.33322298526763916, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.1769657248805743e-05, "loss_breakdown/pointer_loss": 2.947357177734375, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.59099272423191e-05, "loss_breakdown/pointer_loss": 0.15832243859767914, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.2135249562561512e-05, "loss_breakdown/pointer_loss": 0.14698784053325653, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.372182487102691e-05, "loss_breakdown/pointer_loss": 0.09272738546133041, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 1.7821332221501507e-05, "loss_breakdown/pointer_loss": 0.16556954383850098, "step": 2930 }, { "epoch": 0.29878522899644366, "loss_breakdown/lm_loss": 2.7637144739856012e-05, "loss_breakdown/pointer_loss": 0.10948409140110016, "step": 2930 }, { "epoch": 0.29980497380530524, "grad_norm": 2.522034045964704, "learning_rate": 3.890651558073655e-06, "loss": 0.3147, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.5200891539279837e-05, "loss_breakdown/pointer_loss": 0.2158149927854538, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.1914050446648616e-05, "loss_breakdown/pointer_loss": 0.49916988611221313, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.0544962606218178e-05, "loss_breakdown/pointer_loss": 0.40575575828552246, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.4821194781688973e-05, "loss_breakdown/pointer_loss": 0.22365383803844452, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.1872866707562935e-05, "loss_breakdown/pointer_loss": 0.19418996572494507, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.2314014384173788e-05, "loss_breakdown/pointer_loss": 0.4320276379585266, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.7727084923535585e-05, "loss_breakdown/pointer_loss": 0.5708606243133545, "step": 2940 }, { "epoch": 0.29980497380530524, "loss_breakdown/lm_loss": 1.527419408375863e-05, "loss_breakdown/pointer_loss": 0.2519177794456482, "step": 2940 }, { "epoch": 0.3008247186141668, "grad_norm": 7.4409757095941345, "learning_rate": 3.884985835694051e-06, "loss": 0.2916, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 0.0001097229469451122, "loss_breakdown/pointer_loss": 1.1422830820083618, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 4.670093403547071e-05, "loss_breakdown/pointer_loss": 0.4978819489479065, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 2.3386150132864714e-05, "loss_breakdown/pointer_loss": 0.21844947338104248, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 1.8390272089163773e-05, "loss_breakdown/pointer_loss": 1.6749365329742432, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 2.0674773622886278e-05, "loss_breakdown/pointer_loss": 1.6887459754943848, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 3.249763176427223e-05, "loss_breakdown/pointer_loss": 0.8009073734283447, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 1.7803566151997074e-05, "loss_breakdown/pointer_loss": 0.48821893334388733, "step": 2950 }, { "epoch": 0.3008247186141668, "loss_breakdown/lm_loss": 1.7473405023338273e-05, "loss_breakdown/pointer_loss": 0.25491994619369507, "step": 2950 }, { "epoch": 0.3018444634230284, "grad_norm": 3.8313933483250637, "learning_rate": 3.879320113314448e-06, "loss": 0.2923, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 2.1458432456711307e-05, "loss_breakdown/pointer_loss": 0.6096274256706238, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.4002715033711866e-05, "loss_breakdown/pointer_loss": 0.6049238443374634, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.4318002286017872e-05, "loss_breakdown/pointer_loss": 0.30227869749069214, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.3777204003417864e-05, "loss_breakdown/pointer_loss": 0.08581535518169403, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.6794911061879247e-05, "loss_breakdown/pointer_loss": 0.6446443200111389, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.03726097222534e-05, "loss_breakdown/pointer_loss": 0.1694616973400116, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.489011356170522e-05, "loss_breakdown/pointer_loss": 0.33647099137306213, "step": 2960 }, { "epoch": 0.3018444634230284, "loss_breakdown/lm_loss": 1.5659761629649438e-05, "loss_breakdown/pointer_loss": 0.3525245189666748, "step": 2960 }, { "epoch": 0.30286420823189, "grad_norm": 3.3800477668895677, "learning_rate": 3.873654390934845e-06, "loss": 0.3014, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 2.0214101823512465e-05, "loss_breakdown/pointer_loss": 0.4989897608757019, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 1.4254942470870446e-05, "loss_breakdown/pointer_loss": 0.44977015256881714, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 2.0002884411951527e-05, "loss_breakdown/pointer_loss": 0.30977267026901245, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 1.6267087630694732e-05, "loss_breakdown/pointer_loss": 0.7421692609786987, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 1.643639916437678e-05, "loss_breakdown/pointer_loss": 0.39771655201911926, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 1.9729675841517746e-05, "loss_breakdown/pointer_loss": 0.18519823253154755, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 1.6180953025468625e-05, "loss_breakdown/pointer_loss": 0.9698982238769531, "step": 2970 }, { "epoch": 0.30286420823189, "loss_breakdown/lm_loss": 1.1547958820301574e-05, "loss_breakdown/pointer_loss": 2.199296236038208, "step": 2970 }, { "epoch": 0.30388395304075155, "grad_norm": 7.862991328397219, "learning_rate": 3.867988668555241e-06, "loss": 0.2844, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 1.980344495677855e-05, "loss_breakdown/pointer_loss": 1.089524269104004, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 2.052307900157757e-05, "loss_breakdown/pointer_loss": 0.416546106338501, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 4.1985069401562214e-05, "loss_breakdown/pointer_loss": 0.17046183347702026, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 1.7284775822190568e-05, "loss_breakdown/pointer_loss": 2.131389856338501, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 2.0042189134983346e-05, "loss_breakdown/pointer_loss": 0.20752185583114624, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 2.552048499637749e-05, "loss_breakdown/pointer_loss": 0.09263046085834503, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 0.00010159777593798935, "loss_breakdown/pointer_loss": 0.32400888204574585, "step": 2980 }, { "epoch": 0.30388395304075155, "loss_breakdown/lm_loss": 4.1547991713741794e-05, "loss_breakdown/pointer_loss": 0.18430444598197937, "step": 2980 }, { "epoch": 0.30490369784961313, "grad_norm": 2.045378646231369, "learning_rate": 3.862322946175638e-06, "loss": 0.3158, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.6224737919401377e-05, "loss_breakdown/pointer_loss": 0.616445779800415, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 0.0007413756102323532, "loss_breakdown/pointer_loss": 0.25671327114105225, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.4744108739250805e-05, "loss_breakdown/pointer_loss": 0.20939108729362488, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.570496715430636e-05, "loss_breakdown/pointer_loss": 0.23043090105056763, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.8115462808054872e-05, "loss_breakdown/pointer_loss": 0.376107782125473, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.849868567660451e-05, "loss_breakdown/pointer_loss": 0.31744399666786194, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.4276332876761444e-05, "loss_breakdown/pointer_loss": 0.4744008183479309, "step": 2990 }, { "epoch": 0.30490369784961313, "loss_breakdown/lm_loss": 1.5681027434766293e-05, "loss_breakdown/pointer_loss": 0.6712097525596619, "step": 2990 }, { "epoch": 0.3059234426584747, "grad_norm": 28.223110568498758, "learning_rate": 3.856657223796034e-06, "loss": 0.3091, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 6.281326932366937e-05, "loss_breakdown/pointer_loss": 2.7569336891174316, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 2.5306986572104506e-05, "loss_breakdown/pointer_loss": 0.6921952962875366, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 2.945123014796991e-05, "loss_breakdown/pointer_loss": 0.835365355014801, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 3.9518439734820276e-05, "loss_breakdown/pointer_loss": 0.295927494764328, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 2.0354087610030547e-05, "loss_breakdown/pointer_loss": 0.5729591846466064, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 2.3090093236532994e-05, "loss_breakdown/pointer_loss": 0.36163902282714844, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 2.6891819288721308e-05, "loss_breakdown/pointer_loss": 0.74515700340271, "step": 3000 }, { "epoch": 0.3059234426584747, "loss_breakdown/lm_loss": 2.336383840884082e-05, "loss_breakdown/pointer_loss": 0.8026520609855652, "step": 3000 }, { "epoch": 0.3069431874673363, "grad_norm": 4.591435356856706, "learning_rate": 3.850991501416431e-06, "loss": 0.3019, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.0212103916273918e-05, "loss_breakdown/pointer_loss": 0.523347020149231, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.4412843484024052e-05, "loss_breakdown/pointer_loss": 0.19729693233966827, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.732458986225538e-05, "loss_breakdown/pointer_loss": 0.3204959034919739, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.34385463752551e-05, "loss_breakdown/pointer_loss": 0.4397602081298828, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.4347011529025622e-05, "loss_breakdown/pointer_loss": 0.6717138886451721, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.1837224519695155e-05, "loss_breakdown/pointer_loss": 0.3504469096660614, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.8639677364262752e-05, "loss_breakdown/pointer_loss": 0.22365710139274597, "step": 3010 }, { "epoch": 0.3069431874673363, "loss_breakdown/lm_loss": 1.6950954886851832e-05, "loss_breakdown/pointer_loss": 0.36884456872940063, "step": 3010 }, { "epoch": 0.30796293227619786, "grad_norm": 3.601837687468162, "learning_rate": 3.845325779036827e-06, "loss": 0.2971, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 1.4633817045250908e-05, "loss_breakdown/pointer_loss": 0.7796168923377991, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 1.3303879313752986e-05, "loss_breakdown/pointer_loss": 0.25093549489974976, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 2.0286326616769657e-05, "loss_breakdown/pointer_loss": 0.24108368158340454, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 1.6922909708227962e-05, "loss_breakdown/pointer_loss": 1.2571836709976196, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 2.5556479158694856e-05, "loss_breakdown/pointer_loss": 0.40539389848709106, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 1.6203335690079257e-05, "loss_breakdown/pointer_loss": 0.3927608132362366, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 1.155999143520603e-05, "loss_breakdown/pointer_loss": 0.29218578338623047, "step": 3020 }, { "epoch": 0.30796293227619786, "loss_breakdown/lm_loss": 1.2644859452848323e-05, "loss_breakdown/pointer_loss": 0.2494632601737976, "step": 3020 }, { "epoch": 0.30898267708505944, "grad_norm": 5.900577517275596, "learning_rate": 3.839660056657224e-06, "loss": 0.2966, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 1.9998358766315505e-05, "loss_breakdown/pointer_loss": 4.244205474853516, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 2.4488432245561853e-05, "loss_breakdown/pointer_loss": 0.5332163572311401, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 2.164740908483509e-05, "loss_breakdown/pointer_loss": 0.33350446820259094, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 1.3907443644711748e-05, "loss_breakdown/pointer_loss": 0.2500915825366974, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 2.1413145077531226e-05, "loss_breakdown/pointer_loss": 0.11088204383850098, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 1.5051774425955955e-05, "loss_breakdown/pointer_loss": 0.70111083984375, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 1.387558768328745e-05, "loss_breakdown/pointer_loss": 0.1336870640516281, "step": 3030 }, { "epoch": 0.30898267708505944, "loss_breakdown/lm_loss": 2.031627627729904e-05, "loss_breakdown/pointer_loss": 0.21457788348197937, "step": 3030 }, { "epoch": 0.310002421893921, "grad_norm": 2.300549850974101, "learning_rate": 3.833994334277621e-06, "loss": 0.3065, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 1.337063167738961e-05, "loss_breakdown/pointer_loss": 0.45818614959716797, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 1.5790456018294208e-05, "loss_breakdown/pointer_loss": 0.527190089225769, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 2.7784511985373683e-05, "loss_breakdown/pointer_loss": 1.3749607801437378, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 2.2388272554962896e-05, "loss_breakdown/pointer_loss": 0.23785091936588287, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 1.2605561096279416e-05, "loss_breakdown/pointer_loss": 0.4652656316757202, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 1.631266059121117e-05, "loss_breakdown/pointer_loss": 0.10393862426280975, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 1.4887099496263545e-05, "loss_breakdown/pointer_loss": 0.1806803047657013, "step": 3040 }, { "epoch": 0.310002421893921, "loss_breakdown/lm_loss": 1.1637584975687787e-05, "loss_breakdown/pointer_loss": 0.2970399856567383, "step": 3040 }, { "epoch": 0.31102216670278265, "grad_norm": 22.727920900815338, "learning_rate": 3.8283286118980175e-06, "loss": 0.2971, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 0.00024734257021918893, "loss_breakdown/pointer_loss": 1.7122178077697754, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 0.0001136098217102699, "loss_breakdown/pointer_loss": 2.2190780639648438, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 3.041207128262613e-05, "loss_breakdown/pointer_loss": 0.6436597108840942, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 3.9596459828317165e-05, "loss_breakdown/pointer_loss": 0.3221365213394165, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 2.60055730905151e-05, "loss_breakdown/pointer_loss": 0.6355538368225098, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 2.3919297746033408e-05, "loss_breakdown/pointer_loss": 0.41482940316200256, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 1.9345265172887594e-05, "loss_breakdown/pointer_loss": 0.2852509021759033, "step": 3050 }, { "epoch": 0.31102216670278265, "loss_breakdown/lm_loss": 2.0229173969710246e-05, "loss_breakdown/pointer_loss": 0.6273820996284485, "step": 3050 }, { "epoch": 0.31204191151164423, "grad_norm": 2.999975444455303, "learning_rate": 3.822662889518414e-06, "loss": 0.3195, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 2.2219568563741632e-05, "loss_breakdown/pointer_loss": 0.26308560371398926, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 1.6729676644899882e-05, "loss_breakdown/pointer_loss": 0.3808720111846924, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 2.3293598133022897e-05, "loss_breakdown/pointer_loss": 0.09211307764053345, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 1.5121244359761477e-05, "loss_breakdown/pointer_loss": 0.33805257081985474, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 1.7001804735627957e-05, "loss_breakdown/pointer_loss": 0.6408956050872803, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 1.213524865306681e-05, "loss_breakdown/pointer_loss": 0.4609975218772888, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 1.708203853922896e-05, "loss_breakdown/pointer_loss": 0.13871246576309204, "step": 3060 }, { "epoch": 0.31204191151164423, "loss_breakdown/lm_loss": 1.804365274438169e-05, "loss_breakdown/pointer_loss": 1.9770139455795288, "step": 3060 }, { "epoch": 0.3130616563205058, "grad_norm": 6.481935767804864, "learning_rate": 3.8169971671388105e-06, "loss": 0.2907, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.734434044919908e-05, "loss_breakdown/pointer_loss": 0.9758782982826233, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.7107642634073272e-05, "loss_breakdown/pointer_loss": 1.501295804977417, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.3023440260440111e-05, "loss_breakdown/pointer_loss": 0.8103533983230591, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.2897779924969655e-05, "loss_breakdown/pointer_loss": 0.24923807382583618, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.5592313502565958e-05, "loss_breakdown/pointer_loss": 0.8646937608718872, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.8873961380450055e-05, "loss_breakdown/pointer_loss": 0.2560328245162964, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.6228972526732832e-05, "loss_breakdown/pointer_loss": 0.26102545857429504, "step": 3070 }, { "epoch": 0.3130616563205058, "loss_breakdown/lm_loss": 1.884854464151431e-05, "loss_breakdown/pointer_loss": 0.4886627793312073, "step": 3070 }, { "epoch": 0.3140814011293674, "grad_norm": 7.6234779224499265, "learning_rate": 3.811331444759207e-06, "loss": 0.2897, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 1.8218648619949818e-05, "loss_breakdown/pointer_loss": 0.4439307451248169, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 1.5790812540217303e-05, "loss_breakdown/pointer_loss": 0.1855035126209259, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 2.3394968593493104e-05, "loss_breakdown/pointer_loss": 0.18364617228507996, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 3.131690391455777e-05, "loss_breakdown/pointer_loss": 0.604115903377533, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 3.563104473869316e-05, "loss_breakdown/pointer_loss": 0.44998955726623535, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 2.1194666260271333e-05, "loss_breakdown/pointer_loss": 0.27901649475097656, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 1.3545834008255042e-05, "loss_breakdown/pointer_loss": 0.11395540833473206, "step": 3080 }, { "epoch": 0.3140814011293674, "loss_breakdown/lm_loss": 1.874684494396206e-05, "loss_breakdown/pointer_loss": 0.08636188507080078, "step": 3080 }, { "epoch": 0.31510114593822897, "grad_norm": 2.3929402210488315, "learning_rate": 3.805665722379604e-06, "loss": 0.3171, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.8260465367347933e-05, "loss_breakdown/pointer_loss": 0.39206984639167786, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.4072064004722051e-05, "loss_breakdown/pointer_loss": 0.2778840959072113, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.2238535418873653e-05, "loss_breakdown/pointer_loss": 0.1056068167090416, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.1877583347086329e-05, "loss_breakdown/pointer_loss": 0.29743003845214844, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.542550126032438e-05, "loss_breakdown/pointer_loss": 0.6769434809684753, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.0943173037958331e-05, "loss_breakdown/pointer_loss": 0.26618853211402893, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.1745782103389502e-05, "loss_breakdown/pointer_loss": 0.12095591425895691, "step": 3090 }, { "epoch": 0.31510114593822897, "loss_breakdown/lm_loss": 1.181735842692433e-05, "loss_breakdown/pointer_loss": 0.3083902597427368, "step": 3090 }, { "epoch": 0.31612089074709054, "grad_norm": 7.731759522620564, "learning_rate": 3.8000000000000005e-06, "loss": 0.2912, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 0.00012459517165552825, "loss_breakdown/pointer_loss": 3.205477714538574, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 3.7067140510771424e-05, "loss_breakdown/pointer_loss": 1.5241371393203735, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 3.316396760055795e-05, "loss_breakdown/pointer_loss": 1.8160754442214966, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 2.05064734473126e-05, "loss_breakdown/pointer_loss": 0.8798865079879761, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 3.566238592611626e-05, "loss_breakdown/pointer_loss": 0.2947126626968384, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 2.6098592570633627e-05, "loss_breakdown/pointer_loss": 0.5483936071395874, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 3.363890209584497e-05, "loss_breakdown/pointer_loss": 0.6439962387084961, "step": 3100 }, { "epoch": 0.31612089074709054, "loss_breakdown/lm_loss": 1.9792474631685764e-05, "loss_breakdown/pointer_loss": 0.879868745803833, "step": 3100 }, { "epoch": 0.3171406355559521, "grad_norm": 6.344150400778311, "learning_rate": 3.7943342776203965e-06, "loss": 0.2866, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 1.1993147381872404e-05, "loss_breakdown/pointer_loss": 0.46437904238700867, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 1.5964062185958028e-05, "loss_breakdown/pointer_loss": 0.1524694710969925, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 1.466341745981481e-05, "loss_breakdown/pointer_loss": 0.2869275212287903, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 2.5955061573768035e-05, "loss_breakdown/pointer_loss": 0.4558759927749634, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 1.2359285392449237e-05, "loss_breakdown/pointer_loss": 0.2359994351863861, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 3.199608909199014e-05, "loss_breakdown/pointer_loss": 3.438328742980957, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 2.9656835977220908e-05, "loss_breakdown/pointer_loss": 0.5198308825492859, "step": 3110 }, { "epoch": 0.3171406355559521, "loss_breakdown/lm_loss": 1.7125761587521993e-05, "loss_breakdown/pointer_loss": 0.15699252486228943, "step": 3110 }, { "epoch": 0.3181603803648137, "grad_norm": 2.728846508588581, "learning_rate": 3.788668555240794e-06, "loss": 0.3289, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.777405668690335e-05, "loss_breakdown/pointer_loss": 0.5700981616973877, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.4910825484548695e-05, "loss_breakdown/pointer_loss": 1.359163522720337, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.4167098925099708e-05, "loss_breakdown/pointer_loss": 0.5987521409988403, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.7266338545596227e-05, "loss_breakdown/pointer_loss": 0.19353286921977997, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.6502997823408805e-05, "loss_breakdown/pointer_loss": 1.0335983037948608, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.4239219126466196e-05, "loss_breakdown/pointer_loss": 0.8158389925956726, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.594095374457538e-05, "loss_breakdown/pointer_loss": 0.37632355093955994, "step": 3120 }, { "epoch": 0.3181603803648137, "loss_breakdown/lm_loss": 1.2562589290610049e-05, "loss_breakdown/pointer_loss": 0.41928136348724365, "step": 3120 }, { "epoch": 0.3191801251736753, "grad_norm": 12.702443008014125, "learning_rate": 3.78300283286119e-06, "loss": 0.2685, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 1.2095488273189403e-05, "loss_breakdown/pointer_loss": 0.27804723381996155, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 1.3521974324248731e-05, "loss_breakdown/pointer_loss": 0.11536432802677155, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 3.1328130717156455e-05, "loss_breakdown/pointer_loss": 2.237591505050659, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 3.404619201319292e-05, "loss_breakdown/pointer_loss": 0.19869428873062134, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 2.5934305085684173e-05, "loss_breakdown/pointer_loss": 0.17612726986408234, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 1.4225268387235701e-05, "loss_breakdown/pointer_loss": 3.3486359119415283, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 1.6545267499168403e-05, "loss_breakdown/pointer_loss": 0.15165403485298157, "step": 3130 }, { "epoch": 0.3191801251736753, "loss_breakdown/lm_loss": 2.404721817583777e-05, "loss_breakdown/pointer_loss": 0.6248300075531006, "step": 3130 }, { "epoch": 0.32019986998253686, "grad_norm": 5.14192103596408, "learning_rate": 3.7773371104815865e-06, "loss": 0.3049, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.0886750715144444e-05, "loss_breakdown/pointer_loss": 0.16567397117614746, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.0056432074634358e-05, "loss_breakdown/pointer_loss": 0.1860034465789795, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.1954050933127292e-05, "loss_breakdown/pointer_loss": 0.6225647926330566, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 9.34283798414981e-06, "loss_breakdown/pointer_loss": 0.5258073806762695, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.3235204278316814e-05, "loss_breakdown/pointer_loss": 0.4995782971382141, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.106336549128173e-05, "loss_breakdown/pointer_loss": 0.24029554426670074, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.3261364983918611e-05, "loss_breakdown/pointer_loss": 1.4444087743759155, "step": 3140 }, { "epoch": 0.32019986998253686, "loss_breakdown/lm_loss": 1.1801444088632707e-05, "loss_breakdown/pointer_loss": 0.5263057947158813, "step": 3140 }, { "epoch": 0.32121961479139843, "grad_norm": 9.20018979021997, "learning_rate": 3.7716713881019834e-06, "loss": 0.2847, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 8.852069004205987e-05, "loss_breakdown/pointer_loss": 3.015476703643799, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 2.036672412941698e-05, "loss_breakdown/pointer_loss": 0.6028092503547668, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 1.9046012312173843e-05, "loss_breakdown/pointer_loss": 0.7686420679092407, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 1.993134901567828e-05, "loss_breakdown/pointer_loss": 1.3344841003417969, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 1.8657041437109e-05, "loss_breakdown/pointer_loss": 0.874275803565979, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 1.3682356438948773e-05, "loss_breakdown/pointer_loss": 0.5521467328071594, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 1.326482743024826e-05, "loss_breakdown/pointer_loss": 0.7962970733642578, "step": 3150 }, { "epoch": 0.32121961479139843, "loss_breakdown/lm_loss": 1.3655316251970362e-05, "loss_breakdown/pointer_loss": 0.6746425032615662, "step": 3150 }, { "epoch": 0.32223935960026, "grad_norm": 2.7841586613120644, "learning_rate": 3.76600566572238e-06, "loss": 0.2794, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.4407974958885461e-05, "loss_breakdown/pointer_loss": 0.5876954793930054, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.0752390153356828e-05, "loss_breakdown/pointer_loss": 0.1367063969373703, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.1100139090558514e-05, "loss_breakdown/pointer_loss": 0.2978171706199646, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.0796178685268387e-05, "loss_breakdown/pointer_loss": 0.34365230798721313, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.3196069630794227e-05, "loss_breakdown/pointer_loss": 0.17901280522346497, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.21947505249409e-05, "loss_breakdown/pointer_loss": 0.4140376150608063, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 3.303294215584174e-05, "loss_breakdown/pointer_loss": 0.36817729473114014, "step": 3160 }, { "epoch": 0.32223935960026, "loss_breakdown/lm_loss": 1.810727917472832e-05, "loss_breakdown/pointer_loss": 1.2948553562164307, "step": 3160 }, { "epoch": 0.32325910440912164, "grad_norm": 2.2918761191186814, "learning_rate": 3.760339943342777e-06, "loss": 0.3042, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.2826278180000372e-05, "loss_breakdown/pointer_loss": 0.47490960359573364, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.1531732525327243e-05, "loss_breakdown/pointer_loss": 0.39044296741485596, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.0282968105457257e-05, "loss_breakdown/pointer_loss": 0.7192046642303467, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.5407162209157832e-05, "loss_breakdown/pointer_loss": 0.49913734197616577, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.501870428910479e-05, "loss_breakdown/pointer_loss": 0.7005698680877686, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.035406967275776e-05, "loss_breakdown/pointer_loss": 0.34613245725631714, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.1434869520599023e-05, "loss_breakdown/pointer_loss": 0.33656662702560425, "step": 3170 }, { "epoch": 0.32325910440912164, "loss_breakdown/lm_loss": 1.4549906154570635e-05, "loss_breakdown/pointer_loss": 0.5778061747550964, "step": 3170 }, { "epoch": 0.3242788492179832, "grad_norm": 3.6163281010615096, "learning_rate": 3.7546742209631733e-06, "loss": 0.3026, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 2.2640319002675824e-05, "loss_breakdown/pointer_loss": 0.14919763803482056, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 1.587828046467621e-05, "loss_breakdown/pointer_loss": 0.17808270454406738, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 2.6179157430306077e-05, "loss_breakdown/pointer_loss": 0.1359395980834961, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 1.7201331502292305e-05, "loss_breakdown/pointer_loss": 2.5435662269592285, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 3.1702977139502764e-05, "loss_breakdown/pointer_loss": 2.318783760070801, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 2.532656435505487e-05, "loss_breakdown/pointer_loss": 0.42023468017578125, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 1.4833112800261006e-05, "loss_breakdown/pointer_loss": 2.914353132247925, "step": 3180 }, { "epoch": 0.3242788492179832, "loss_breakdown/lm_loss": 2.336366924282629e-05, "loss_breakdown/pointer_loss": 0.04986433684825897, "step": 3180 }, { "epoch": 0.3252985940268448, "grad_norm": 3.360759956036436, "learning_rate": 3.7490084985835694e-06, "loss": 0.2806, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 2.8392869353410788e-05, "loss_breakdown/pointer_loss": 0.23489144444465637, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 1.2605672054633033e-05, "loss_breakdown/pointer_loss": 0.41941481828689575, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 1.3043731087236665e-05, "loss_breakdown/pointer_loss": 0.7188913226127625, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 1.2838226211897563e-05, "loss_breakdown/pointer_loss": 0.4735552668571472, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 1.166266156360507e-05, "loss_breakdown/pointer_loss": 0.5246490836143494, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 1.2466319276427384e-05, "loss_breakdown/pointer_loss": 0.7535678148269653, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 1.3915918316342868e-05, "loss_breakdown/pointer_loss": 0.3086164593696594, "step": 3190 }, { "epoch": 0.3252985940268448, "loss_breakdown/lm_loss": 9.90644457488088e-06, "loss_breakdown/pointer_loss": 0.13781356811523438, "step": 3190 }, { "epoch": 0.3263183388357064, "grad_norm": 11.993704948605087, "learning_rate": 3.7433427762039663e-06, "loss": 0.2972, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 3.111087062279694e-05, "loss_breakdown/pointer_loss": 1.36314857006073, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 1.836472438299097e-05, "loss_breakdown/pointer_loss": 0.6693145632743835, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 1.934109786816407e-05, "loss_breakdown/pointer_loss": 1.6950538158416748, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 1.645999691390898e-05, "loss_breakdown/pointer_loss": 1.3108981847763062, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 1.4394161553354934e-05, "loss_breakdown/pointer_loss": 0.6605764031410217, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 2.0802332073799334e-05, "loss_breakdown/pointer_loss": 1.0479395389556885, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 1.1725222066161223e-05, "loss_breakdown/pointer_loss": 0.45102381706237793, "step": 3200 }, { "epoch": 0.3263183388357064, "loss_breakdown/lm_loss": 1.5964420526870526e-05, "loss_breakdown/pointer_loss": 0.653143048286438, "step": 3200 }, { "epoch": 0.32733808364456796, "grad_norm": 4.810781088702207, "learning_rate": 3.737677053824363e-06, "loss": 0.2849, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 1.628708378120791e-05, "loss_breakdown/pointer_loss": 0.6157593727111816, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 1.5617792087141424e-05, "loss_breakdown/pointer_loss": 0.1881345957517624, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 1.6486057575093582e-05, "loss_breakdown/pointer_loss": 1.703420639038086, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 2.1960411686450243e-05, "loss_breakdown/pointer_loss": 0.258867472410202, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 1.1661077223834582e-05, "loss_breakdown/pointer_loss": 0.3607144057750702, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 1.6384508853661828e-05, "loss_breakdown/pointer_loss": 0.16201233863830566, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 2.450028659950476e-05, "loss_breakdown/pointer_loss": 0.25136780738830566, "step": 3210 }, { "epoch": 0.32733808364456796, "loss_breakdown/lm_loss": 2.192573265347164e-05, "loss_breakdown/pointer_loss": 0.4480498731136322, "step": 3210 }, { "epoch": 0.32835782845342953, "grad_norm": 3.7167235585147758, "learning_rate": 3.7320113314447594e-06, "loss": 0.2826, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.1996098692179658e-05, "loss_breakdown/pointer_loss": 0.36765411496162415, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.2149311260145623e-05, "loss_breakdown/pointer_loss": 0.5975976586341858, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.4854523215035442e-05, "loss_breakdown/pointer_loss": 0.23807936906814575, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.5452656953129917e-05, "loss_breakdown/pointer_loss": 0.8007128238677979, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 9.798293831408955e-06, "loss_breakdown/pointer_loss": 0.7656975984573364, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.228202017955482e-05, "loss_breakdown/pointer_loss": 0.5784558057785034, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.4663460206065793e-05, "loss_breakdown/pointer_loss": 0.5282014012336731, "step": 3220 }, { "epoch": 0.32835782845342953, "loss_breakdown/lm_loss": 1.1893156624864787e-05, "loss_breakdown/pointer_loss": 0.77495938539505, "step": 3220 }, { "epoch": 0.3293775732622911, "grad_norm": 4.799284076234878, "learning_rate": 3.7263456090651563e-06, "loss": 0.2642, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 9.727305041451473e-06, "loss_breakdown/pointer_loss": 0.25120919942855835, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 1.9998562493128702e-05, "loss_breakdown/pointer_loss": 1.8705756664276123, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 6.861469591967762e-05, "loss_breakdown/pointer_loss": 0.07542794197797775, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 1.1336567695252597e-05, "loss_breakdown/pointer_loss": 0.07455748319625854, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 1.8357615772401914e-05, "loss_breakdown/pointer_loss": 0.2866317629814148, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 1.3903373655921314e-05, "loss_breakdown/pointer_loss": 0.23368532955646515, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 1.0522006959945429e-05, "loss_breakdown/pointer_loss": 0.42550814151763916, "step": 3230 }, { "epoch": 0.3293775732622911, "loss_breakdown/lm_loss": 2.824210241669789e-05, "loss_breakdown/pointer_loss": 0.5921854376792908, "step": 3230 }, { "epoch": 0.3303973180711527, "grad_norm": 3.99391234028294, "learning_rate": 3.7206798866855528e-06, "loss": 0.2904, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 1.69643280969467e-05, "loss_breakdown/pointer_loss": 0.3808409571647644, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 1.303001590713393e-05, "loss_breakdown/pointer_loss": 0.4128754734992981, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 9.805156878428534e-06, "loss_breakdown/pointer_loss": 0.11241677403450012, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 1.1941870980081148e-05, "loss_breakdown/pointer_loss": 0.20446208119392395, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 1.8489037756808102e-05, "loss_breakdown/pointer_loss": 0.21474140882492065, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 1.0486208338988945e-05, "loss_breakdown/pointer_loss": 0.3668145537376404, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 9.367032362206373e-06, "loss_breakdown/pointer_loss": 0.22381387650966644, "step": 3240 }, { "epoch": 0.3303973180711527, "loss_breakdown/lm_loss": 1.3421442417893559e-05, "loss_breakdown/pointer_loss": 1.036310076713562, "step": 3240 }, { "epoch": 0.33141706288001427, "grad_norm": 16.976658420380613, "learning_rate": 3.7150141643059497e-06, "loss": 0.3167, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 6.305447459453717e-05, "loss_breakdown/pointer_loss": 1.0070074796676636, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 2.08310939342482e-05, "loss_breakdown/pointer_loss": 1.0322390794754028, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 1.7220465451828204e-05, "loss_breakdown/pointer_loss": 0.88823401927948, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 1.905623503262177e-05, "loss_breakdown/pointer_loss": 0.6824647188186646, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 1.621101182536222e-05, "loss_breakdown/pointer_loss": 0.6098565459251404, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 2.1997539079166017e-05, "loss_breakdown/pointer_loss": 0.9415910243988037, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 1.3421416952041909e-05, "loss_breakdown/pointer_loss": 1.6962933540344238, "step": 3250 }, { "epoch": 0.33141706288001427, "loss_breakdown/lm_loss": 1.288447037950391e-05, "loss_breakdown/pointer_loss": 0.37020769715309143, "step": 3250 }, { "epoch": 0.33243680768887585, "grad_norm": 4.427066436486327, "learning_rate": 3.709348441926346e-06, "loss": 0.2922, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.4463435945799574e-05, "loss_breakdown/pointer_loss": 1.3660249710083008, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.1491482837300282e-05, "loss_breakdown/pointer_loss": 0.22434702515602112, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.4582888070435729e-05, "loss_breakdown/pointer_loss": 0.4689721465110779, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.6071537174866535e-05, "loss_breakdown/pointer_loss": 0.6577763557434082, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.448092643840937e-05, "loss_breakdown/pointer_loss": 0.3846230208873749, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 9.878322089207359e-06, "loss_breakdown/pointer_loss": 0.7644031047821045, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.93107953236904e-05, "loss_breakdown/pointer_loss": 6.360037803649902, "step": 3260 }, { "epoch": 0.33243680768887585, "loss_breakdown/lm_loss": 1.2882292139693163e-05, "loss_breakdown/pointer_loss": 0.15799687802791595, "step": 3260 }, { "epoch": 0.3334565524977374, "grad_norm": 3.168333090156813, "learning_rate": 3.7036827195467423e-06, "loss": 0.2961, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 1.3090591892250814e-05, "loss_breakdown/pointer_loss": 0.2516951858997345, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 1.243058704858413e-05, "loss_breakdown/pointer_loss": 0.2518901824951172, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 1.5846140740904957e-05, "loss_breakdown/pointer_loss": 0.5603688359260559, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 1.3437788766168524e-05, "loss_breakdown/pointer_loss": 0.8591450452804565, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 1.0208086678176187e-05, "loss_breakdown/pointer_loss": 0.6217981576919556, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 9.805384252103977e-06, "loss_breakdown/pointer_loss": 0.6766438484191895, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 1.113497910409933e-05, "loss_breakdown/pointer_loss": 0.3727463483810425, "step": 3270 }, { "epoch": 0.3334565524977374, "loss_breakdown/lm_loss": 9.83768404694274e-06, "loss_breakdown/pointer_loss": 0.401140034198761, "step": 3270 }, { "epoch": 0.334476297306599, "grad_norm": 5.465810771317756, "learning_rate": 3.6980169971671392e-06, "loss": 0.2856, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 1.6382788089686073e-05, "loss_breakdown/pointer_loss": 0.41557976603507996, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 1.4626500160375144e-05, "loss_breakdown/pointer_loss": 0.11636988818645477, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 1.4968260984460358e-05, "loss_breakdown/pointer_loss": 0.2028127908706665, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 1.3724433301831596e-05, "loss_breakdown/pointer_loss": 1.0122735500335693, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 1.3804061381961219e-05, "loss_breakdown/pointer_loss": 0.11730930209159851, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 2.3844435418141074e-05, "loss_breakdown/pointer_loss": 0.25641757249832153, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 2.066574052150827e-05, "loss_breakdown/pointer_loss": 0.24564766883850098, "step": 3280 }, { "epoch": 0.334476297306599, "loss_breakdown/lm_loss": 2.3399035853799433e-05, "loss_breakdown/pointer_loss": 0.39577963948249817, "step": 3280 }, { "epoch": 0.3354960421154606, "grad_norm": 2.6566167369904155, "learning_rate": 3.6923512747875357e-06, "loss": 0.2872, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 1.0280080459779128e-05, "loss_breakdown/pointer_loss": 0.4133656620979309, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 1.333284944848856e-05, "loss_breakdown/pointer_loss": 0.13434462249279022, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 1.1104104487458244e-05, "loss_breakdown/pointer_loss": 0.1825045943260193, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 9.842465260589961e-06, "loss_breakdown/pointer_loss": 0.6215359568595886, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 1.1969154911639635e-05, "loss_breakdown/pointer_loss": 0.11720702052116394, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 1.0589548764983192e-05, "loss_breakdown/pointer_loss": 0.2680603861808777, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 9.355381735076662e-06, "loss_breakdown/pointer_loss": 0.20341050624847412, "step": 3290 }, { "epoch": 0.3354960421154606, "loss_breakdown/lm_loss": 9.25502445170423e-06, "loss_breakdown/pointer_loss": 0.3063393533229828, "step": 3290 }, { "epoch": 0.3365157869243222, "grad_norm": 11.263869246753691, "learning_rate": 3.6866855524079327e-06, "loss": 0.2601, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 7.354716217378154e-05, "loss_breakdown/pointer_loss": 1.5858454704284668, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 4.0315229853149503e-05, "loss_breakdown/pointer_loss": 0.8997478485107422, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 1.792557850421872e-05, "loss_breakdown/pointer_loss": 0.811111330986023, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 1.8386876035947353e-05, "loss_breakdown/pointer_loss": 1.8072185516357422, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 1.5599422113155015e-05, "loss_breakdown/pointer_loss": 1.1706788539886475, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 2.7929443604080006e-05, "loss_breakdown/pointer_loss": 0.662381112575531, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 1.4559339433617424e-05, "loss_breakdown/pointer_loss": 0.4953782558441162, "step": 3300 }, { "epoch": 0.3365157869243222, "loss_breakdown/lm_loss": 1.617040834389627e-05, "loss_breakdown/pointer_loss": 0.44644999504089355, "step": 3300 }, { "epoch": 0.3375355317331838, "grad_norm": 2.3776884866741144, "learning_rate": 3.681019830028329e-06, "loss": 0.2826, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 1.3190714525990188e-05, "loss_breakdown/pointer_loss": 0.3518342673778534, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 1.1040528988814913e-05, "loss_breakdown/pointer_loss": 0.1495923101902008, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 1.0141844541067258e-05, "loss_breakdown/pointer_loss": 0.6930345296859741, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 2.4523111278540455e-05, "loss_breakdown/pointer_loss": 0.1560339331626892, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 9.658953786129132e-06, "loss_breakdown/pointer_loss": 0.2605575621128082, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 1.9068738765781745e-05, "loss_breakdown/pointer_loss": 0.4022252559661865, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 1.7078184100682847e-05, "loss_breakdown/pointer_loss": 0.47885435819625854, "step": 3310 }, { "epoch": 0.3375355317331838, "loss_breakdown/lm_loss": 1.54648059833562e-05, "loss_breakdown/pointer_loss": 2.975789785385132, "step": 3310 }, { "epoch": 0.33855527654204537, "grad_norm": 4.4467190866429815, "learning_rate": 3.6753541076487252e-06, "loss": 0.2991, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.7445629055146128e-05, "loss_breakdown/pointer_loss": 0.35822224617004395, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 2.437953298795037e-05, "loss_breakdown/pointer_loss": 0.19948239624500275, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.2954908015672117e-05, "loss_breakdown/pointer_loss": 0.4137861132621765, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.952749880729243e-05, "loss_breakdown/pointer_loss": 0.3758690357208252, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.6308293197653256e-05, "loss_breakdown/pointer_loss": 0.8246052265167236, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.6947431504377164e-05, "loss_breakdown/pointer_loss": 0.2857488989830017, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.3253793440526351e-05, "loss_breakdown/pointer_loss": 0.29723337292671204, "step": 3320 }, { "epoch": 0.33855527654204537, "loss_breakdown/lm_loss": 1.3861298612027895e-05, "loss_breakdown/pointer_loss": 1.4027810096740723, "step": 3320 }, { "epoch": 0.33957502135090695, "grad_norm": 5.73961940013811, "learning_rate": 3.669688385269122e-06, "loss": 0.2963, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 8.420018275501207e-06, "loss_breakdown/pointer_loss": 0.14541494846343994, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 2.1536066924454644e-05, "loss_breakdown/pointer_loss": 0.18889537453651428, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 1.4070295037527103e-05, "loss_breakdown/pointer_loss": 0.14732596278190613, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 1.541319397801999e-05, "loss_breakdown/pointer_loss": 0.13589048385620117, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 1.3740530448558275e-05, "loss_breakdown/pointer_loss": 2.823418617248535, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 1.3720631613978185e-05, "loss_breakdown/pointer_loss": 0.11355309188365936, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 1.4415957593882922e-05, "loss_breakdown/pointer_loss": 0.21214675903320312, "step": 3330 }, { "epoch": 0.33957502135090695, "loss_breakdown/lm_loss": 3.874192771036178e-05, "loss_breakdown/pointer_loss": 0.4353768825531006, "step": 3330 }, { "epoch": 0.3405947661597685, "grad_norm": 2.962357662504871, "learning_rate": 3.6640226628895187e-06, "loss": 0.3109, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.4728363566973712e-05, "loss_breakdown/pointer_loss": 0.32942676544189453, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.3235541700851172e-05, "loss_breakdown/pointer_loss": 0.8828117847442627, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.0993914656864945e-05, "loss_breakdown/pointer_loss": 0.6726270914077759, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.2437109944585245e-05, "loss_breakdown/pointer_loss": 0.2412889301776886, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.0339886102883611e-05, "loss_breakdown/pointer_loss": 0.27251869440078735, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.6116229744511656e-05, "loss_breakdown/pointer_loss": 0.1891527771949768, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.5061402336868923e-05, "loss_breakdown/pointer_loss": 0.7858256697654724, "step": 3340 }, { "epoch": 0.3405947661597685, "loss_breakdown/lm_loss": 1.1442827599239536e-05, "loss_breakdown/pointer_loss": 0.7932205200195312, "step": 3340 }, { "epoch": 0.3416145109686301, "grad_norm": 10.369007317096177, "learning_rate": 3.658356940509915e-06, "loss": 0.296, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 5.6207172747235745e-05, "loss_breakdown/pointer_loss": 2.1969048976898193, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 4.055099270772189e-05, "loss_breakdown/pointer_loss": 0.7362073063850403, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 3.4810949728125706e-05, "loss_breakdown/pointer_loss": 0.39318954944610596, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 2.1616811864078045e-05, "loss_breakdown/pointer_loss": 0.7944254875183105, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 2.5394891054020263e-05, "loss_breakdown/pointer_loss": 1.4951355457305908, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 1.427566439815564e-05, "loss_breakdown/pointer_loss": 0.5621761083602905, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 1.4191322406986728e-05, "loss_breakdown/pointer_loss": 0.9513798952102661, "step": 3350 }, { "epoch": 0.3416145109686301, "loss_breakdown/lm_loss": 2.01959082914982e-05, "loss_breakdown/pointer_loss": 0.3892335295677185, "step": 3350 }, { "epoch": 0.3426342557774917, "grad_norm": 13.4672026552567, "learning_rate": 3.652691218130312e-06, "loss": 0.2807, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 1.2896137377538253e-05, "loss_breakdown/pointer_loss": 0.5866482257843018, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 1.2562348274514079e-05, "loss_breakdown/pointer_loss": 0.13553044199943542, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 1.376037562295096e-05, "loss_breakdown/pointer_loss": 0.28657811880111694, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 1.778260775608942e-05, "loss_breakdown/pointer_loss": 0.2303609997034073, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 1.7828417185228318e-05, "loss_breakdown/pointer_loss": 0.8149817585945129, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 2.4193685021600686e-05, "loss_breakdown/pointer_loss": 0.48875099420547485, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 2.753551962086931e-05, "loss_breakdown/pointer_loss": 0.1209418922662735, "step": 3360 }, { "epoch": 0.3426342557774917, "loss_breakdown/lm_loss": 1.3533701348933391e-05, "loss_breakdown/pointer_loss": 0.39430397748947144, "step": 3360 }, { "epoch": 0.34365400058635326, "grad_norm": 3.2095498598874, "learning_rate": 3.6470254957507086e-06, "loss": 0.3033, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.0879165529331658e-05, "loss_breakdown/pointer_loss": 0.42685309052467346, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.3466651580529287e-05, "loss_breakdown/pointer_loss": 0.9339566230773926, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.284940299228765e-05, "loss_breakdown/pointer_loss": 0.1702633500099182, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.3810345080855768e-05, "loss_breakdown/pointer_loss": 0.8035539388656616, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.1594923307711724e-05, "loss_breakdown/pointer_loss": 0.38857007026672363, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.40281254061847e-05, "loss_breakdown/pointer_loss": 0.2863616943359375, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.3696856512979139e-05, "loss_breakdown/pointer_loss": 0.6222898960113525, "step": 3370 }, { "epoch": 0.34365400058635326, "loss_breakdown/lm_loss": 1.043229531205725e-05, "loss_breakdown/pointer_loss": 0.7760269045829773, "step": 3370 }, { "epoch": 0.34467374539521484, "grad_norm": 13.981680706088083, "learning_rate": 3.6413597733711055e-06, "loss": 0.2861, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 1.434439946024213e-05, "loss_breakdown/pointer_loss": 0.11817726492881775, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 1.1356429240549915e-05, "loss_breakdown/pointer_loss": 2.0919723510742188, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 9.695515473140404e-06, "loss_breakdown/pointer_loss": 0.1799524873495102, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 9.572333510732278e-06, "loss_breakdown/pointer_loss": 0.20569173991680145, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 1.7605938410270028e-05, "loss_breakdown/pointer_loss": 1.4515962600708008, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 9.631947250454687e-06, "loss_breakdown/pointer_loss": 0.20620298385620117, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 1.1094166438851971e-05, "loss_breakdown/pointer_loss": 0.15119719505310059, "step": 3380 }, { "epoch": 0.34467374539521484, "loss_breakdown/lm_loss": 2.0336045054136775e-05, "loss_breakdown/pointer_loss": 0.11323484778404236, "step": 3380 }, { "epoch": 0.3456934902040764, "grad_norm": 6.325986204138663, "learning_rate": 3.6356940509915016e-06, "loss": 0.3009, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 8.516464731656015e-06, "loss_breakdown/pointer_loss": 0.2717125117778778, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 9.312932888860814e-06, "loss_breakdown/pointer_loss": 0.14939342439174652, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 1.2665682334045414e-05, "loss_breakdown/pointer_loss": 0.8037331104278564, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 1.3793690413876902e-05, "loss_breakdown/pointer_loss": 0.3217118978500366, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 1.2414164302754216e-05, "loss_breakdown/pointer_loss": 0.09216506034135818, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 9.639887139201164e-06, "loss_breakdown/pointer_loss": 0.45243898034095764, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 9.516139471088536e-06, "loss_breakdown/pointer_loss": 0.28674018383026123, "step": 3390 }, { "epoch": 0.3456934902040764, "loss_breakdown/lm_loss": 9.779384527064394e-06, "loss_breakdown/pointer_loss": 0.7181397080421448, "step": 3390 }, { "epoch": 0.346713235012938, "grad_norm": 8.622051281829096, "learning_rate": 3.630028328611898e-06, "loss": 0.3052, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 4.8642661568010226e-05, "loss_breakdown/pointer_loss": 1.6517236232757568, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.9775305190705694e-05, "loss_breakdown/pointer_loss": 1.0769832134246826, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.9128496205667034e-05, "loss_breakdown/pointer_loss": 0.9171723127365112, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.368396533507621e-05, "loss_breakdown/pointer_loss": 0.7348681092262268, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.835313014453277e-05, "loss_breakdown/pointer_loss": 0.638432502746582, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.487224562879419e-05, "loss_breakdown/pointer_loss": 0.5352475047111511, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.2709778275166173e-05, "loss_breakdown/pointer_loss": 0.21155469119548798, "step": 3400 }, { "epoch": 0.346713235012938, "loss_breakdown/lm_loss": 1.5476787666557357e-05, "loss_breakdown/pointer_loss": 0.1549181044101715, "step": 3400 }, { "epoch": 0.34773297982179957, "grad_norm": 2.9251069942623396, "learning_rate": 3.624362606232295e-06, "loss": 0.2763, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 8.447825166513212e-06, "loss_breakdown/pointer_loss": 0.27123260498046875, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 8.707803317520302e-06, "loss_breakdown/pointer_loss": 0.14887376129627228, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 1.1258393897151109e-05, "loss_breakdown/pointer_loss": 0.7264922857284546, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 1.1300759069854394e-05, "loss_breakdown/pointer_loss": 0.44301167130470276, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 7.837908924557269e-06, "loss_breakdown/pointer_loss": 1.0865777730941772, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 9.353784662380349e-06, "loss_breakdown/pointer_loss": 0.0982862263917923, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 1.0688901056710165e-05, "loss_breakdown/pointer_loss": 0.2498493492603302, "step": 3410 }, { "epoch": 0.34773297982179957, "loss_breakdown/lm_loss": 9.957772817870136e-06, "loss_breakdown/pointer_loss": 0.2073110044002533, "step": 3410 }, { "epoch": 0.3487527246306612, "grad_norm": 3.7442063839358837, "learning_rate": 3.6186968838526915e-06, "loss": 0.3137, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 2.1752968677901663e-05, "loss_breakdown/pointer_loss": 0.5683854222297668, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 1.2658768355322536e-05, "loss_breakdown/pointer_loss": 1.5174493789672852, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 9.748049706104212e-06, "loss_breakdown/pointer_loss": 0.36361920833587646, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 1.3022333405388054e-05, "loss_breakdown/pointer_loss": 0.42232322692871094, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 1.0535777619224973e-05, "loss_breakdown/pointer_loss": 0.21649275720119476, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 1.0177089279750362e-05, "loss_breakdown/pointer_loss": 0.24677139520645142, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 1.2158365279901773e-05, "loss_breakdown/pointer_loss": 0.6851934194564819, "step": 3420 }, { "epoch": 0.3487527246306612, "loss_breakdown/lm_loss": 8.564668860344682e-06, "loss_breakdown/pointer_loss": 0.2689877152442932, "step": 3420 }, { "epoch": 0.3497724694395228, "grad_norm": 9.485625500484815, "learning_rate": 3.613031161473088e-06, "loss": 0.2863, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.88859885383863e-05, "loss_breakdown/pointer_loss": 0.37279245257377625, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.2321977919782512e-05, "loss_breakdown/pointer_loss": 0.1993255615234375, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.029948271025205e-05, "loss_breakdown/pointer_loss": 0.5646737813949585, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.1098145478172228e-05, "loss_breakdown/pointer_loss": 2.6449692249298096, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.1519355211930815e-05, "loss_breakdown/pointer_loss": 0.21684929728507996, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.0990883311023936e-05, "loss_breakdown/pointer_loss": 0.03585967421531677, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.250477089342894e-05, "loss_breakdown/pointer_loss": 0.33441945910453796, "step": 3430 }, { "epoch": 0.3497724694395228, "loss_breakdown/lm_loss": 1.1181598893017508e-05, "loss_breakdown/pointer_loss": 3.0608012676239014, "step": 3430 }, { "epoch": 0.35079221424838436, "grad_norm": 3.748119617159533, "learning_rate": 3.607365439093485e-06, "loss": 0.2807, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 1.1778362022596411e-05, "loss_breakdown/pointer_loss": 0.43874475359916687, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 9.368982318846975e-06, "loss_breakdown/pointer_loss": 0.5961276888847351, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 1.2122523003199603e-05, "loss_breakdown/pointer_loss": 0.6420050859451294, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 9.790868716663681e-06, "loss_breakdown/pointer_loss": 0.3239726424217224, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 9.749559467309155e-06, "loss_breakdown/pointer_loss": 0.4176396131515503, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 1.33642652144772e-05, "loss_breakdown/pointer_loss": 0.6822003126144409, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 3.8760994357289746e-05, "loss_breakdown/pointer_loss": 0.24237611889839172, "step": 3440 }, { "epoch": 0.35079221424838436, "loss_breakdown/lm_loss": 1.2822523785871454e-05, "loss_breakdown/pointer_loss": 0.2504046857357025, "step": 3440 }, { "epoch": 0.35181195905724594, "grad_norm": 49.71495195596012, "learning_rate": 3.601699716713881e-06, "loss": 0.2747, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 7.937685586512089e-05, "loss_breakdown/pointer_loss": 3.2106080055236816, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 3.5623910662252456e-05, "loss_breakdown/pointer_loss": 0.9180306196212769, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 2.3984215658856556e-05, "loss_breakdown/pointer_loss": 1.1126031875610352, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 1.772117684595287e-05, "loss_breakdown/pointer_loss": 0.43167179822921753, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 3.20213075610809e-05, "loss_breakdown/pointer_loss": 0.8374413251876831, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 3.888136416207999e-05, "loss_breakdown/pointer_loss": 1.7372514009475708, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 1.4644292605225928e-05, "loss_breakdown/pointer_loss": 0.6602667570114136, "step": 3450 }, { "epoch": 0.35181195905724594, "loss_breakdown/lm_loss": 4.027093018521555e-05, "loss_breakdown/pointer_loss": 0.4859762191772461, "step": 3450 }, { "epoch": 0.3528317038661075, "grad_norm": 3.2641311432329068, "learning_rate": 3.596033994334278e-06, "loss": 0.2709, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.70090606843587e-05, "loss_breakdown/pointer_loss": 0.43555334210395813, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.4977725186327007e-05, "loss_breakdown/pointer_loss": 0.12654177844524384, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.1064373211411294e-05, "loss_breakdown/pointer_loss": 0.26195117831230164, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.332023566646967e-05, "loss_breakdown/pointer_loss": 0.26114529371261597, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.8839415133697912e-05, "loss_breakdown/pointer_loss": 0.18751974403858185, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 9.620011041988619e-06, "loss_breakdown/pointer_loss": 0.3459431529045105, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.9989729480585083e-05, "loss_breakdown/pointer_loss": 0.6714299321174622, "step": 3460 }, { "epoch": 0.3528317038661075, "loss_breakdown/lm_loss": 1.0617382031341549e-05, "loss_breakdown/pointer_loss": 0.16121703386306763, "step": 3460 }, { "epoch": 0.3538514486749691, "grad_norm": 4.925756513481822, "learning_rate": 3.5903682719546745e-06, "loss": 0.3036, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 1.1664385965559632e-05, "loss_breakdown/pointer_loss": 1.0285831689834595, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 1.3068093721813057e-05, "loss_breakdown/pointer_loss": 0.8295620083808899, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 9.140074325841852e-06, "loss_breakdown/pointer_loss": 0.7003955841064453, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 9.097886504605412e-06, "loss_breakdown/pointer_loss": 0.7477872967720032, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 1.130332202592399e-05, "loss_breakdown/pointer_loss": 0.752407431602478, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 1.0034837941930164e-05, "loss_breakdown/pointer_loss": 0.17772909998893738, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 1.5164001524681225e-05, "loss_breakdown/pointer_loss": 0.3274163007736206, "step": 3470 }, { "epoch": 0.3538514486749691, "loss_breakdown/lm_loss": 8.871087629813701e-06, "loss_breakdown/pointer_loss": 0.5811635851860046, "step": 3470 }, { "epoch": 0.35487119348383067, "grad_norm": 5.06547630231726, "learning_rate": 3.584702549575071e-06, "loss": 0.2921, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 9.095527275349014e-06, "loss_breakdown/pointer_loss": 0.32962384819984436, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 1.2031925507471897e-05, "loss_breakdown/pointer_loss": 0.28770673274993896, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 8.0107511166716e-06, "loss_breakdown/pointer_loss": 0.08518677949905396, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 1.0458398719492834e-05, "loss_breakdown/pointer_loss": 0.15276813507080078, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 1.0080950232804753e-05, "loss_breakdown/pointer_loss": 0.15172092616558075, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 1.242918187926989e-05, "loss_breakdown/pointer_loss": 0.26444947719573975, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 1.0053151527245063e-05, "loss_breakdown/pointer_loss": 0.4708520770072937, "step": 3480 }, { "epoch": 0.35487119348383067, "loss_breakdown/lm_loss": 1.0013402061304078e-05, "loss_breakdown/pointer_loss": 3.6596710681915283, "step": 3480 }, { "epoch": 0.35589093829269225, "grad_norm": 3.0397639488643473, "learning_rate": 3.579036827195468e-06, "loss": 0.3037, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 1.0933554221992381e-05, "loss_breakdown/pointer_loss": 0.32754409313201904, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 9.02003375813365e-06, "loss_breakdown/pointer_loss": 1.4162548780441284, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 9.269820111512672e-06, "loss_breakdown/pointer_loss": 0.16628508269786835, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 8.572315891797189e-06, "loss_breakdown/pointer_loss": 0.32420605421066284, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 7.463064321200363e-06, "loss_breakdown/pointer_loss": 0.35652923583984375, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 9.352300367027055e-06, "loss_breakdown/pointer_loss": 0.2206801176071167, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 8.946740308601875e-06, "loss_breakdown/pointer_loss": 0.49711740016937256, "step": 3490 }, { "epoch": 0.35589093829269225, "loss_breakdown/lm_loss": 7.728067430434749e-06, "loss_breakdown/pointer_loss": 0.09902982413768768, "step": 3490 }, { "epoch": 0.3569106831015538, "grad_norm": 11.29469859218336, "learning_rate": 3.5733711048158644e-06, "loss": 0.2786, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 4.669236659538001e-05, "loss_breakdown/pointer_loss": 1.028059482574463, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 3.299208401585929e-05, "loss_breakdown/pointer_loss": 1.2463817596435547, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 1.84862146852538e-05, "loss_breakdown/pointer_loss": 0.4877394437789917, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 1.6672060155542567e-05, "loss_breakdown/pointer_loss": 0.4530068039894104, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 1.6487823813804425e-05, "loss_breakdown/pointer_loss": 0.7206246852874756, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 1.7668922737357207e-05, "loss_breakdown/pointer_loss": 0.5071640014648438, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 1.3049500921624713e-05, "loss_breakdown/pointer_loss": 0.4061562120914459, "step": 3500 }, { "epoch": 0.3569106831015538, "loss_breakdown/lm_loss": 1.4459270460065454e-05, "loss_breakdown/pointer_loss": 0.2596946954727173, "step": 3500 }, { "epoch": 0.3579304279104154, "grad_norm": 8.301304709845066, "learning_rate": 3.5677053824362613e-06, "loss": 0.2764, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 1.258461270481348e-05, "loss_breakdown/pointer_loss": 0.20514827966690063, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 1.0469704648130573e-05, "loss_breakdown/pointer_loss": 0.19455647468566895, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 1.0667674359865487e-05, "loss_breakdown/pointer_loss": 0.5716658234596252, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 9.167043572233524e-06, "loss_breakdown/pointer_loss": 0.05137229710817337, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 1.1809384886873886e-05, "loss_breakdown/pointer_loss": 0.14537313580513, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 4.0453476685797796e-05, "loss_breakdown/pointer_loss": 0.2507562041282654, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 1.3051014320808463e-05, "loss_breakdown/pointer_loss": 1.139352560043335, "step": 3510 }, { "epoch": 0.3579304279104154, "loss_breakdown/lm_loss": 9.332589797850233e-06, "loss_breakdown/pointer_loss": 0.10723745822906494, "step": 3510 }, { "epoch": 0.358950172719277, "grad_norm": 5.666696245391683, "learning_rate": 3.5620396600566574e-06, "loss": 0.308, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 1.2461800906748977e-05, "loss_breakdown/pointer_loss": 0.3200288414955139, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 1.3880855476600118e-05, "loss_breakdown/pointer_loss": 0.2182842195034027, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 1.1523239663802087e-05, "loss_breakdown/pointer_loss": 0.3836464285850525, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 2.8551001378218643e-05, "loss_breakdown/pointer_loss": 0.9067814946174622, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 1.3159560694475658e-05, "loss_breakdown/pointer_loss": 0.11211033910512924, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 9.378089089295827e-06, "loss_breakdown/pointer_loss": 0.3321828246116638, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 1.2027008779114112e-05, "loss_breakdown/pointer_loss": 0.5080714225769043, "step": 3520 }, { "epoch": 0.358950172719277, "loss_breakdown/lm_loss": 1.155826248577796e-05, "loss_breakdown/pointer_loss": 0.3505004048347473, "step": 3520 }, { "epoch": 0.35996991752813856, "grad_norm": 17.455706899576416, "learning_rate": 3.556373937677054e-06, "loss": 0.2725, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 1.1487563824630342e-05, "loss_breakdown/pointer_loss": 0.16860532760620117, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 7.94718198449118e-06, "loss_breakdown/pointer_loss": 0.050415679812431335, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 1.3490174751495942e-05, "loss_breakdown/pointer_loss": 0.15773466229438782, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 1.1702124538714997e-05, "loss_breakdown/pointer_loss": 0.33417195081710815, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 9.87826661003055e-06, "loss_breakdown/pointer_loss": 0.28612858057022095, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 1.1793416888394859e-05, "loss_breakdown/pointer_loss": 0.11606796830892563, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 1.1690190149238333e-05, "loss_breakdown/pointer_loss": 0.41406548023223877, "step": 3530 }, { "epoch": 0.35996991752813856, "loss_breakdown/lm_loss": 1.3378889889281709e-05, "loss_breakdown/pointer_loss": 0.5286386013031006, "step": 3530 }, { "epoch": 0.36098966233700014, "grad_norm": 5.477341119394419, "learning_rate": 3.550708215297451e-06, "loss": 0.2792, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 1.1509401701914612e-05, "loss_breakdown/pointer_loss": 0.27270299196243286, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 1.1616701158345677e-05, "loss_breakdown/pointer_loss": 0.3574286103248596, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 1.1500060281832702e-05, "loss_breakdown/pointer_loss": 0.13349765539169312, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 9.753407539392356e-06, "loss_breakdown/pointer_loss": 0.32065334916114807, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 1.636493470869027e-05, "loss_breakdown/pointer_loss": 0.5278003811836243, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 9.47877015278209e-06, "loss_breakdown/pointer_loss": 0.33376356959342957, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 9.503048204351217e-06, "loss_breakdown/pointer_loss": 0.19883981347084045, "step": 3540 }, { "epoch": 0.36098966233700014, "loss_breakdown/lm_loss": 2.954309093183838e-05, "loss_breakdown/pointer_loss": 0.41268390417099, "step": 3540 }, { "epoch": 0.3620094071458618, "grad_norm": 14.76584594203905, "learning_rate": 3.5450424929178474e-06, "loss": 0.2945, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 4.4630960474023595e-05, "loss_breakdown/pointer_loss": 2.957806348800659, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 4.247308970661834e-05, "loss_breakdown/pointer_loss": 0.5359294414520264, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 2.111964568030089e-05, "loss_breakdown/pointer_loss": 1.190661907196045, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 2.3168651750893332e-05, "loss_breakdown/pointer_loss": 0.42795330286026, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 2.4155006030923687e-05, "loss_breakdown/pointer_loss": 1.248705506324768, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 1.2612395039468538e-05, "loss_breakdown/pointer_loss": 0.4138617515563965, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 1.4348355762194842e-05, "loss_breakdown/pointer_loss": 0.6389325261116028, "step": 3550 }, { "epoch": 0.3620094071458618, "loss_breakdown/lm_loss": 1.4305919648904819e-05, "loss_breakdown/pointer_loss": 0.4403739869594574, "step": 3550 }, { "epoch": 0.36302915195472335, "grad_norm": 3.3314574426266783, "learning_rate": 3.539376770538244e-06, "loss": 0.2702, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 8.694172720424831e-06, "loss_breakdown/pointer_loss": 0.07727047801017761, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 1.282792891288409e-05, "loss_breakdown/pointer_loss": 0.3934210240840912, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 9.1106021500309e-06, "loss_breakdown/pointer_loss": 0.3583906292915344, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 9.569680514687207e-06, "loss_breakdown/pointer_loss": 0.18998616933822632, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 1.599747884029057e-05, "loss_breakdown/pointer_loss": 0.21373672783374786, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 2.8813767130486667e-05, "loss_breakdown/pointer_loss": 0.9020946025848389, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 1.0315384315617848e-05, "loss_breakdown/pointer_loss": 0.13771677017211914, "step": 3560 }, { "epoch": 0.36302915195472335, "loss_breakdown/lm_loss": 1.4157733858155552e-05, "loss_breakdown/pointer_loss": 0.5642396807670593, "step": 3560 }, { "epoch": 0.36404889676358493, "grad_norm": 3.7239873155971597, "learning_rate": 3.533711048158641e-06, "loss": 0.2793, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.2061683264619205e-05, "loss_breakdown/pointer_loss": 0.128476083278656, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.327594327449333e-05, "loss_breakdown/pointer_loss": 0.20230355858802795, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.0674129953258671e-05, "loss_breakdown/pointer_loss": 0.461495578289032, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 2.306520036654547e-05, "loss_breakdown/pointer_loss": 0.7434403896331787, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.4499940334644634e-05, "loss_breakdown/pointer_loss": 0.6817284822463989, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.5555760910501704e-05, "loss_breakdown/pointer_loss": 0.7581794261932373, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.4306051525636576e-05, "loss_breakdown/pointer_loss": 0.386684387922287, "step": 3570 }, { "epoch": 0.36404889676358493, "loss_breakdown/lm_loss": 1.0684910193958785e-05, "loss_breakdown/pointer_loss": 0.2758221924304962, "step": 3570 }, { "epoch": 0.3650686415724465, "grad_norm": 34.54281755174068, "learning_rate": 3.528045325779037e-06, "loss": 0.2638, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 2.408714863122441e-05, "loss_breakdown/pointer_loss": 0.20911094546318054, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 1.774562224454712e-05, "loss_breakdown/pointer_loss": 0.35708189010620117, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 1.3303416380949784e-05, "loss_breakdown/pointer_loss": 0.211321622133255, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 1.8687285773921758e-05, "loss_breakdown/pointer_loss": 0.15825550258159637, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 1.5381530829472467e-05, "loss_breakdown/pointer_loss": 1.3100224733352661, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 8.416040145675652e-06, "loss_breakdown/pointer_loss": 2.549700975418091, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 8.427972716162913e-06, "loss_breakdown/pointer_loss": 0.5199556350708008, "step": 3580 }, { "epoch": 0.3650686415724465, "loss_breakdown/lm_loss": 9.071679414773826e-06, "loss_breakdown/pointer_loss": 0.4652681350708008, "step": 3580 }, { "epoch": 0.3660883863813081, "grad_norm": 2.955517612704471, "learning_rate": 3.522379603399434e-06, "loss": 0.3186, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 1.1234282283112407e-05, "loss_breakdown/pointer_loss": 0.380779892206192, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 1.6757301636971533e-05, "loss_breakdown/pointer_loss": 1.1332762241363525, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 1.0210093932983e-05, "loss_breakdown/pointer_loss": 1.2619988918304443, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 1.0260564522468485e-05, "loss_breakdown/pointer_loss": 0.18583129346370697, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 1.1754341358027887e-05, "loss_breakdown/pointer_loss": 0.6682424545288086, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 8.861093192535918e-06, "loss_breakdown/pointer_loss": 0.16631390154361725, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 2.5818448193604127e-05, "loss_breakdown/pointer_loss": 0.5835686922073364, "step": 3590 }, { "epoch": 0.3660883863813081, "loss_breakdown/lm_loss": 9.010101166495588e-06, "loss_breakdown/pointer_loss": 0.21522989869117737, "step": 3590 }, { "epoch": 0.36710813119016966, "grad_norm": 13.565973597157353, "learning_rate": 3.5167138810198303e-06, "loss": 0.27, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 0.00010410553659312427, "loss_breakdown/pointer_loss": 2.342880964279175, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 2.5976607503253035e-05, "loss_breakdown/pointer_loss": 0.99454265832901, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 2.7576479624258354e-05, "loss_breakdown/pointer_loss": 0.7731517553329468, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 2.0490850147325546e-05, "loss_breakdown/pointer_loss": 0.28617358207702637, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 7.360397285083309e-05, "loss_breakdown/pointer_loss": 1.1898473501205444, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 1.4776423086004797e-05, "loss_breakdown/pointer_loss": 0.6646220088005066, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 1.4787428881390952e-05, "loss_breakdown/pointer_loss": 0.5336620211601257, "step": 3600 }, { "epoch": 0.36710813119016966, "loss_breakdown/lm_loss": 1.526830419606995e-05, "loss_breakdown/pointer_loss": 0.3577262759208679, "step": 3600 }, { "epoch": 0.36812787599903124, "grad_norm": 8.890786987154243, "learning_rate": 3.511048158640227e-06, "loss": 0.2811, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 1.2818509276257828e-05, "loss_breakdown/pointer_loss": 0.4335446357727051, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 9.316664545622189e-06, "loss_breakdown/pointer_loss": 0.16347770392894745, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 8.551121936761774e-06, "loss_breakdown/pointer_loss": 0.09997272491455078, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 1.06587040136219e-05, "loss_breakdown/pointer_loss": 0.5187176465988159, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 1.710544529487379e-05, "loss_breakdown/pointer_loss": 0.23447373509407043, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 1.5564297427772544e-05, "loss_breakdown/pointer_loss": 0.22520732879638672, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 1.982388312171679e-05, "loss_breakdown/pointer_loss": 0.8217357397079468, "step": 3610 }, { "epoch": 0.36812787599903124, "loss_breakdown/lm_loss": 1.801962935132906e-05, "loss_breakdown/pointer_loss": 0.2029857635498047, "step": 3610 }, { "epoch": 0.3691476208078928, "grad_norm": 6.746152603508226, "learning_rate": 3.5053824362606237e-06, "loss": 0.2949, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.2188162145321257e-05, "loss_breakdown/pointer_loss": 0.6794160604476929, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.1539095794432797e-05, "loss_breakdown/pointer_loss": 0.5883481502532959, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 3.643596937763505e-05, "loss_breakdown/pointer_loss": 0.38299208879470825, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.6958214473561384e-05, "loss_breakdown/pointer_loss": 0.21932902932167053, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.2475578841986135e-05, "loss_breakdown/pointer_loss": 0.45336297154426575, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.5768462617415935e-05, "loss_breakdown/pointer_loss": 0.3780609965324402, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.4306219782156404e-05, "loss_breakdown/pointer_loss": 0.44935762882232666, "step": 3620 }, { "epoch": 0.3691476208078928, "loss_breakdown/lm_loss": 1.3183768714952748e-05, "loss_breakdown/pointer_loss": 0.29252976179122925, "step": 3620 }, { "epoch": 0.3701673656167544, "grad_norm": 7.4870340799577395, "learning_rate": 3.4997167138810202e-06, "loss": 0.287, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 1.9243056158302352e-05, "loss_breakdown/pointer_loss": 0.1539822518825531, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 1.653389335842803e-05, "loss_breakdown/pointer_loss": 0.5832309126853943, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 1.17021254482097e-05, "loss_breakdown/pointer_loss": 2.765751600265503, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 2.026353104156442e-05, "loss_breakdown/pointer_loss": 0.0834135040640831, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 1.2949773008585908e-05, "loss_breakdown/pointer_loss": 0.21822555363178253, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 2.2202922991709784e-05, "loss_breakdown/pointer_loss": 0.19065408408641815, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 2.771046456473414e-05, "loss_breakdown/pointer_loss": 0.11210574209690094, "step": 3630 }, { "epoch": 0.3701673656167544, "loss_breakdown/lm_loss": 3.0323253668029793e-05, "loss_breakdown/pointer_loss": 5.927946090698242, "step": 3630 }, { "epoch": 0.371187110425616, "grad_norm": 3.102508244971203, "learning_rate": 3.4940509915014163e-06, "loss": 0.3159, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.077969773177756e-05, "loss_breakdown/pointer_loss": 0.18740017712116241, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.2216289178468287e-05, "loss_breakdown/pointer_loss": 0.3431823253631592, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.0998794095939957e-05, "loss_breakdown/pointer_loss": 1.4520307779312134, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.825774415920023e-05, "loss_breakdown/pointer_loss": 0.7929340600967407, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.2112187505408656e-05, "loss_breakdown/pointer_loss": 0.22419756650924683, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 2.137989577022381e-05, "loss_breakdown/pointer_loss": 0.13582590222358704, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.2336500731180422e-05, "loss_breakdown/pointer_loss": 0.15057390928268433, "step": 3640 }, { "epoch": 0.371187110425616, "loss_breakdown/lm_loss": 1.2588096979015972e-05, "loss_breakdown/pointer_loss": 0.253373920917511, "step": 3640 }, { "epoch": 0.37220685523447755, "grad_norm": 14.477137987660976, "learning_rate": 3.4883852691218132e-06, "loss": 0.314, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 0.00011739241745090112, "loss_breakdown/pointer_loss": 2.3477587699890137, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 6.664579268544912e-05, "loss_breakdown/pointer_loss": 0.604560136795044, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 9.899454016704112e-05, "loss_breakdown/pointer_loss": 1.009623646736145, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 1.9378152501303703e-05, "loss_breakdown/pointer_loss": 0.767471194267273, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 2.552660407673102e-05, "loss_breakdown/pointer_loss": 1.063055157661438, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 1.4759861187485512e-05, "loss_breakdown/pointer_loss": 0.4895799160003662, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 2.108807530021295e-05, "loss_breakdown/pointer_loss": 0.3777230978012085, "step": 3650 }, { "epoch": 0.37220685523447755, "loss_breakdown/lm_loss": 1.9846775103360415e-05, "loss_breakdown/pointer_loss": 1.2631359100341797, "step": 3650 }, { "epoch": 0.37322660004333913, "grad_norm": 4.2809303670390895, "learning_rate": 3.4827195467422097e-06, "loss": 0.29, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 8.892878213373479e-06, "loss_breakdown/pointer_loss": 0.1546163558959961, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 1.1503405403345823e-05, "loss_breakdown/pointer_loss": 0.15752974152565002, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 1.1163031558680814e-05, "loss_breakdown/pointer_loss": 0.5080713033676147, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 1.4493495655187871e-05, "loss_breakdown/pointer_loss": 0.08862867206335068, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 1.4393354831554461e-05, "loss_breakdown/pointer_loss": 0.18999610841274261, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 1.2314031664573122e-05, "loss_breakdown/pointer_loss": 2.1791417598724365, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 1.0462371392350178e-05, "loss_breakdown/pointer_loss": 0.1184292733669281, "step": 3660 }, { "epoch": 0.37322660004333913, "loss_breakdown/lm_loss": 4.4526397687150165e-05, "loss_breakdown/pointer_loss": 0.524968683719635, "step": 3660 }, { "epoch": 0.37424634485220076, "grad_norm": 2.7631232012542273, "learning_rate": 3.4770538243626067e-06, "loss": 0.3126, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.3631195542984642e-05, "loss_breakdown/pointer_loss": 0.28598859906196594, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.030062139761867e-05, "loss_breakdown/pointer_loss": 0.6268853545188904, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.3859333193977363e-05, "loss_breakdown/pointer_loss": 0.4486963152885437, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 9.526275789539795e-06, "loss_breakdown/pointer_loss": 0.3219369649887085, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.1492261364765e-05, "loss_breakdown/pointer_loss": 0.3009157180786133, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.1802029803220648e-05, "loss_breakdown/pointer_loss": 0.5056798458099365, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.5528798030572943e-05, "loss_breakdown/pointer_loss": 0.16935288906097412, "step": 3670 }, { "epoch": 0.37424634485220076, "loss_breakdown/lm_loss": 1.0662778549885843e-05, "loss_breakdown/pointer_loss": 0.14418429136276245, "step": 3670 }, { "epoch": 0.37526608966106234, "grad_norm": 6.474575616362218, "learning_rate": 3.471388101983003e-06, "loss": 0.2423, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.3435887922241818e-05, "loss_breakdown/pointer_loss": 0.18419723212718964, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.0700802704377566e-05, "loss_breakdown/pointer_loss": 0.12662336230278015, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 8.122010513034184e-06, "loss_breakdown/pointer_loss": 0.30115917325019836, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.2536548638308886e-05, "loss_breakdown/pointer_loss": 0.1297808289527893, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.747935493767727e-05, "loss_breakdown/pointer_loss": 0.35628554224967957, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.702261761238333e-05, "loss_breakdown/pointer_loss": 2.9529850482940674, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.0164398190681823e-05, "loss_breakdown/pointer_loss": 0.43325376510620117, "step": 3680 }, { "epoch": 0.37526608966106234, "loss_breakdown/lm_loss": 1.1328628716000821e-05, "loss_breakdown/pointer_loss": 0.39904212951660156, "step": 3680 }, { "epoch": 0.3762858344699239, "grad_norm": 3.0154754946120685, "learning_rate": 3.4657223796033997e-06, "loss": 0.3062, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 8.994859854283277e-06, "loss_breakdown/pointer_loss": 0.1858203411102295, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 1.0582202776276972e-05, "loss_breakdown/pointer_loss": 0.6901534795761108, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 1.2630459423235152e-05, "loss_breakdown/pointer_loss": 0.22733114659786224, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 1.107033767766552e-05, "loss_breakdown/pointer_loss": 0.7055118083953857, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 1.6464071450172924e-05, "loss_breakdown/pointer_loss": 0.5138441324234009, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 8.983545740193222e-06, "loss_breakdown/pointer_loss": 0.5117863416671753, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 8.231289029936306e-06, "loss_breakdown/pointer_loss": 0.23273059725761414, "step": 3690 }, { "epoch": 0.3762858344699239, "loss_breakdown/lm_loss": 1.0290415957570076e-05, "loss_breakdown/pointer_loss": 0.9915072321891785, "step": 3690 }, { "epoch": 0.3773055792787855, "grad_norm": 7.765140237721791, "learning_rate": 3.4600566572237966e-06, "loss": 0.3107, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 4.426818122738041e-05, "loss_breakdown/pointer_loss": 2.670332908630371, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 3.306900180177763e-05, "loss_breakdown/pointer_loss": 0.9522002935409546, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 1.5013775737315882e-05, "loss_breakdown/pointer_loss": 0.7709569931030273, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 1.667437936703209e-05, "loss_breakdown/pointer_loss": 0.5409241318702698, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 2.1769168597529642e-05, "loss_breakdown/pointer_loss": 0.40116286277770996, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 1.6543821402592584e-05, "loss_breakdown/pointer_loss": 1.0625100135803223, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 1.3827472685079556e-05, "loss_breakdown/pointer_loss": 1.0544651746749878, "step": 3700 }, { "epoch": 0.3773055792787855, "loss_breakdown/lm_loss": 1.8444192392053083e-05, "loss_breakdown/pointer_loss": 0.2799845039844513, "step": 3700 }, { "epoch": 0.3783253240876471, "grad_norm": 6.645969184335113, "learning_rate": 3.4543909348441927e-06, "loss": 0.2863, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 2.1957141143502668e-05, "loss_breakdown/pointer_loss": 0.35502690076828003, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 8.037254701775964e-06, "loss_breakdown/pointer_loss": 0.06739333271980286, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 2.927195055235643e-05, "loss_breakdown/pointer_loss": 0.45915576815605164, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 1.287432405661093e-05, "loss_breakdown/pointer_loss": 0.7039586305618286, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 1.8372789782006294e-05, "loss_breakdown/pointer_loss": 2.152003765106201, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 9.560355465509929e-06, "loss_breakdown/pointer_loss": 0.07741076499223709, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 1.0569662663328927e-05, "loss_breakdown/pointer_loss": 0.15508829057216644, "step": 3710 }, { "epoch": 0.3783253240876471, "loss_breakdown/lm_loss": 5.2558381867129356e-05, "loss_breakdown/pointer_loss": 0.997600793838501, "step": 3710 }, { "epoch": 0.37934506889650865, "grad_norm": 3.016098453091843, "learning_rate": 3.448725212464589e-06, "loss": 0.3151, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.2171560229035094e-05, "loss_breakdown/pointer_loss": 0.316861093044281, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.142606015491765e-05, "loss_breakdown/pointer_loss": 0.25091761350631714, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.1951702617807314e-05, "loss_breakdown/pointer_loss": 0.23402445018291473, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.0224352990917396e-05, "loss_breakdown/pointer_loss": 0.2865082919597626, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.6211010006372817e-05, "loss_breakdown/pointer_loss": 1.200056552886963, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.2049536053382326e-05, "loss_breakdown/pointer_loss": 0.2771962285041809, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 9.957734619092662e-06, "loss_breakdown/pointer_loss": 0.3040909171104431, "step": 3720 }, { "epoch": 0.37934506889650865, "loss_breakdown/lm_loss": 1.2070974662492517e-05, "loss_breakdown/pointer_loss": 0.39588016271591187, "step": 3720 }, { "epoch": 0.38036481370537023, "grad_norm": 10.101730191542453, "learning_rate": 3.443059490084986e-06, "loss": 0.2807, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 1.1145827556902077e-05, "loss_breakdown/pointer_loss": 0.639797031879425, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 9.81074663286563e-06, "loss_breakdown/pointer_loss": 0.38495945930480957, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 1.0812068467203062e-05, "loss_breakdown/pointer_loss": 2.1714844703674316, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 5.4799245845060796e-05, "loss_breakdown/pointer_loss": 3.2233898639678955, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 1.46063684951514e-05, "loss_breakdown/pointer_loss": 0.21760880947113037, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 1.154716210294282e-05, "loss_breakdown/pointer_loss": 0.6522901058197021, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 2.5256691515096463e-05, "loss_breakdown/pointer_loss": 0.12334215641021729, "step": 3730 }, { "epoch": 0.38036481370537023, "loss_breakdown/lm_loss": 5.6179116654675454e-05, "loss_breakdown/pointer_loss": 0.33068275451660156, "step": 3730 }, { "epoch": 0.3813845585142318, "grad_norm": 4.352382095184364, "learning_rate": 3.4373937677053826e-06, "loss": 0.3042, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 1.6370304365409538e-05, "loss_breakdown/pointer_loss": 0.09890684485435486, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 1.3697479516849853e-05, "loss_breakdown/pointer_loss": 0.32677173614501953, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 1.04363225545967e-05, "loss_breakdown/pointer_loss": 0.27433574199676514, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 1.0480805940460414e-05, "loss_breakdown/pointer_loss": 1.108008861541748, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 1.9560613509383984e-05, "loss_breakdown/pointer_loss": 1.272881269454956, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 7.426636329910252e-06, "loss_breakdown/pointer_loss": 0.20117610692977905, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 8.460213393846061e-06, "loss_breakdown/pointer_loss": 0.16284950077533722, "step": 3740 }, { "epoch": 0.3813845585142318, "loss_breakdown/lm_loss": 1.0928040865110233e-05, "loss_breakdown/pointer_loss": 0.1848675012588501, "step": 3740 }, { "epoch": 0.3824043033230934, "grad_norm": 17.511525338683875, "learning_rate": 3.4317280453257796e-06, "loss": 0.3189, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 3.132631172775291e-05, "loss_breakdown/pointer_loss": 1.2719848155975342, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 2.4963108444353566e-05, "loss_breakdown/pointer_loss": 0.38097962737083435, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 2.2042480850359425e-05, "loss_breakdown/pointer_loss": 0.37266024947166443, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 2.0352403225842863e-05, "loss_breakdown/pointer_loss": 0.7426993250846863, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 2.699592732824385e-05, "loss_breakdown/pointer_loss": 1.4409631490707397, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 1.2405100278556347e-05, "loss_breakdown/pointer_loss": 0.3067834973335266, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 1.2356463230389636e-05, "loss_breakdown/pointer_loss": 0.6992024779319763, "step": 3750 }, { "epoch": 0.3824043033230934, "loss_breakdown/lm_loss": 1.1935644579352811e-05, "loss_breakdown/pointer_loss": 0.6435070037841797, "step": 3750 }, { "epoch": 0.38342404813195496, "grad_norm": 2.6006066184781473, "learning_rate": 3.426062322946176e-06, "loss": 0.294, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 7.401476523227757e-06, "loss_breakdown/pointer_loss": 0.14255796372890472, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 4.401650949148461e-05, "loss_breakdown/pointer_loss": 0.381325364112854, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 8.818681635602843e-06, "loss_breakdown/pointer_loss": 0.17961004376411438, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 1.1799883395724464e-05, "loss_breakdown/pointer_loss": 0.23438887298107147, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 8.620011612947565e-06, "loss_breakdown/pointer_loss": 0.38168686628341675, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 1.1018707482435275e-05, "loss_breakdown/pointer_loss": 1.8401679992675781, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 1.1503435416670982e-05, "loss_breakdown/pointer_loss": 2.695739984512329, "step": 3760 }, { "epoch": 0.38342404813195496, "loss_breakdown/lm_loss": 9.286215572501533e-06, "loss_breakdown/pointer_loss": 0.9805634021759033, "step": 3760 }, { "epoch": 0.38444379294081654, "grad_norm": 2.84141938663466, "learning_rate": 3.420396600566572e-06, "loss": 0.299, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 1.6455360309919342e-05, "loss_breakdown/pointer_loss": 0.3634766936302185, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 8.5590863818652e-06, "loss_breakdown/pointer_loss": 0.23938745260238647, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 9.690839760878589e-06, "loss_breakdown/pointer_loss": 0.33926382660865784, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 9.050613698491361e-06, "loss_breakdown/pointer_loss": 0.41250330209732056, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 1.147822604252724e-05, "loss_breakdown/pointer_loss": 0.2045070230960846, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 1.0087451300933026e-05, "loss_breakdown/pointer_loss": 0.6366403102874756, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 9.19633293960942e-06, "loss_breakdown/pointer_loss": 0.5631392002105713, "step": 3770 }, { "epoch": 0.38444379294081654, "loss_breakdown/lm_loss": 1.0898165783146396e-05, "loss_breakdown/pointer_loss": 0.07513295114040375, "step": 3770 }, { "epoch": 0.3854635377496781, "grad_norm": 9.571565035004053, "learning_rate": 3.414730878186969e-06, "loss": 0.2812, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 9.417387445864733e-06, "loss_breakdown/pointer_loss": 0.5771225690841675, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 9.552490155328996e-06, "loss_breakdown/pointer_loss": 0.3290651738643646, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 9.874313036561944e-06, "loss_breakdown/pointer_loss": 0.14637741446495056, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 1.1610588444455061e-05, "loss_breakdown/pointer_loss": 0.2802533507347107, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 1.0708793524827342e-05, "loss_breakdown/pointer_loss": 1.6608428955078125, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 1.1804044333985075e-05, "loss_breakdown/pointer_loss": 0.411158949136734, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 9.027969099406619e-06, "loss_breakdown/pointer_loss": 0.3810828626155853, "step": 3780 }, { "epoch": 0.3854635377496781, "loss_breakdown/lm_loss": 9.409426638740115e-06, "loss_breakdown/pointer_loss": 1.7933539152145386, "step": 3780 }, { "epoch": 0.3864832825585397, "grad_norm": 2.406923656847711, "learning_rate": 3.4090651558073656e-06, "loss": 0.2941, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 7.625327725691022e-06, "loss_breakdown/pointer_loss": 0.5059897899627686, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 9.781482731341384e-06, "loss_breakdown/pointer_loss": 0.3154756426811218, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 8.437895303359255e-06, "loss_breakdown/pointer_loss": 0.10923460125923157, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 9.935760317603126e-06, "loss_breakdown/pointer_loss": 0.4386186897754669, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 1.126954703067895e-05, "loss_breakdown/pointer_loss": 1.132537841796875, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 7.755458682368044e-06, "loss_breakdown/pointer_loss": 0.12980350852012634, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 7.929691491881385e-06, "loss_breakdown/pointer_loss": 0.5128101110458374, "step": 3790 }, { "epoch": 0.3864832825585397, "loss_breakdown/lm_loss": 8.159988283296116e-06, "loss_breakdown/pointer_loss": 0.8640230894088745, "step": 3790 }, { "epoch": 0.38750302736740133, "grad_norm": 16.50707908056119, "learning_rate": 3.4033994334277625e-06, "loss": 0.2856, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 2.621577186801005e-05, "loss_breakdown/pointer_loss": 1.6178538799285889, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 1.4161262697598431e-05, "loss_breakdown/pointer_loss": 0.8158493638038635, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 1.4232123248802964e-05, "loss_breakdown/pointer_loss": 0.5520256757736206, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 1.277444425795693e-05, "loss_breakdown/pointer_loss": 0.5582723617553711, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 1.586594407854136e-05, "loss_breakdown/pointer_loss": 1.698267936706543, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 1.5423527656821534e-05, "loss_breakdown/pointer_loss": 0.7213665246963501, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 9.953499102266505e-06, "loss_breakdown/pointer_loss": 0.22442740201950073, "step": 3800 }, { "epoch": 0.38750302736740133, "loss_breakdown/lm_loss": 1.1771981917263474e-05, "loss_breakdown/pointer_loss": 0.4753858745098114, "step": 3800 }, { "epoch": 0.3885227721762629, "grad_norm": 2.6763431688239696, "learning_rate": 3.397733711048159e-06, "loss": 0.2832, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 9.8597220130614e-06, "loss_breakdown/pointer_loss": 0.12497781217098236, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 8.834832442516927e-06, "loss_breakdown/pointer_loss": 0.24423964321613312, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 1.1998106856481172e-05, "loss_breakdown/pointer_loss": 0.12418365478515625, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 1.237761080119526e-05, "loss_breakdown/pointer_loss": 0.2623836398124695, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 9.357783710584044e-06, "loss_breakdown/pointer_loss": 0.14911997318267822, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 1.7662920072325505e-05, "loss_breakdown/pointer_loss": 1.688100814819336, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 1.1062410521844868e-05, "loss_breakdown/pointer_loss": 1.9293686151504517, "step": 3810 }, { "epoch": 0.3885227721762629, "loss_breakdown/lm_loss": 7.790886229486205e-06, "loss_breakdown/pointer_loss": 0.08264296501874924, "step": 3810 }, { "epoch": 0.3895425169851245, "grad_norm": 2.558852528569202, "learning_rate": 3.3920679886685555e-06, "loss": 0.3177, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 8.533422260370571e-06, "loss_breakdown/pointer_loss": 0.8369719982147217, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 9.425312782695983e-06, "loss_breakdown/pointer_loss": 0.668124794960022, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 8.098166290437803e-06, "loss_breakdown/pointer_loss": 0.3579252362251282, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 9.935496564139612e-06, "loss_breakdown/pointer_loss": 0.3501160144805908, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 9.374227374792099e-06, "loss_breakdown/pointer_loss": 0.5237208604812622, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 7.171290235419292e-06, "loss_breakdown/pointer_loss": 0.19475172460079193, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 9.263722859031986e-06, "loss_breakdown/pointer_loss": 0.7088897228240967, "step": 3820 }, { "epoch": 0.3895425169851245, "loss_breakdown/lm_loss": 7.63680236559594e-06, "loss_breakdown/pointer_loss": 0.23170733451843262, "step": 3820 }, { "epoch": 0.39056226179398607, "grad_norm": 9.075106300859364, "learning_rate": 3.3864022662889524e-06, "loss": 0.2809, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 9.155112820735667e-06, "loss_breakdown/pointer_loss": 0.6869224309921265, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.147561488323845e-05, "loss_breakdown/pointer_loss": 0.11936606466770172, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.9775330656557344e-05, "loss_breakdown/pointer_loss": 2.941025733947754, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.147955481428653e-05, "loss_breakdown/pointer_loss": 0.14231224358081818, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.2608097677002661e-05, "loss_breakdown/pointer_loss": 0.09805271029472351, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.3839665371051524e-05, "loss_breakdown/pointer_loss": 0.13735532760620117, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.6180018064915203e-05, "loss_breakdown/pointer_loss": 0.37266767024993896, "step": 3830 }, { "epoch": 0.39056226179398607, "loss_breakdown/lm_loss": 1.154714664153289e-05, "loss_breakdown/pointer_loss": 0.2711793780326843, "step": 3830 }, { "epoch": 0.39158200660284764, "grad_norm": 1.7520477461702426, "learning_rate": 3.3807365439093485e-06, "loss": 0.3091, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 7.97578559286194e-06, "loss_breakdown/pointer_loss": 0.7686638832092285, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 1.0404894965176936e-05, "loss_breakdown/pointer_loss": 0.6890468597412109, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 7.818545782356523e-06, "loss_breakdown/pointer_loss": 0.5872025489807129, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 8.13426413515117e-06, "loss_breakdown/pointer_loss": 1.5233120918273926, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 7.11432176103699e-06, "loss_breakdown/pointer_loss": 0.284498393535614, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 7.799485501891468e-06, "loss_breakdown/pointer_loss": 0.4810340404510498, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 1.0144506632059347e-05, "loss_breakdown/pointer_loss": 0.2011488378047943, "step": 3840 }, { "epoch": 0.39158200660284764, "loss_breakdown/lm_loss": 9.135252184933051e-06, "loss_breakdown/pointer_loss": 0.26655063033103943, "step": 3840 }, { "epoch": 0.3926017514117092, "grad_norm": 8.87377834221863, "learning_rate": 3.375070821529745e-06, "loss": 0.291, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 0.00013106691767461598, "loss_breakdown/pointer_loss": 1.3065118789672852, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 1.9292709112050943e-05, "loss_breakdown/pointer_loss": 0.7189258337020874, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 2.292858698638156e-05, "loss_breakdown/pointer_loss": 1.0739490985870361, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 1.486126257077558e-05, "loss_breakdown/pointer_loss": 1.053786277770996, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 1.5430710845976137e-05, "loss_breakdown/pointer_loss": 0.3942907750606537, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 1.198180325445719e-05, "loss_breakdown/pointer_loss": 0.6851856112480164, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 1.3107614904583897e-05, "loss_breakdown/pointer_loss": 0.3972119688987732, "step": 3850 }, { "epoch": 0.3926017514117092, "loss_breakdown/lm_loss": 1.2468525710573886e-05, "loss_breakdown/pointer_loss": 1.6376404762268066, "step": 3850 }, { "epoch": 0.3936214962205708, "grad_norm": 14.818290321942017, "learning_rate": 3.369405099150142e-06, "loss": 0.2718, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 1.2091486496501602e-05, "loss_breakdown/pointer_loss": 0.3988710939884186, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 1.0272424333379604e-05, "loss_breakdown/pointer_loss": 0.2953897714614868, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 2.5854458726826124e-05, "loss_breakdown/pointer_loss": 0.601991593837738, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 1.0873625797103159e-05, "loss_breakdown/pointer_loss": 0.45088276267051697, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 1.2786901606887113e-05, "loss_breakdown/pointer_loss": 0.31960394978523254, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 7.718689630564768e-06, "loss_breakdown/pointer_loss": 0.30190449953079224, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 8.883350346877705e-06, "loss_breakdown/pointer_loss": 0.9599730968475342, "step": 3860 }, { "epoch": 0.3936214962205708, "loss_breakdown/lm_loss": 9.785581823962275e-06, "loss_breakdown/pointer_loss": 0.18024250864982605, "step": 3860 }, { "epoch": 0.3946412410294324, "grad_norm": 2.087054075619371, "learning_rate": 3.3637393767705384e-06, "loss": 0.3044, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 1.1680439456540626e-05, "loss_breakdown/pointer_loss": 0.30063939094543457, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 9.705416232463904e-06, "loss_breakdown/pointer_loss": 0.3869773745536804, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 9.398123438586481e-06, "loss_breakdown/pointer_loss": 0.915336012840271, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 9.4503557193093e-06, "loss_breakdown/pointer_loss": 0.38546985387802124, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 1.0938156265183352e-05, "loss_breakdown/pointer_loss": 0.15504565834999084, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 9.60906982072629e-06, "loss_breakdown/pointer_loss": 0.6261431574821472, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 8.993772098619957e-06, "loss_breakdown/pointer_loss": 0.4857730567455292, "step": 3870 }, { "epoch": 0.3946412410294324, "loss_breakdown/lm_loss": 9.214653800881933e-06, "loss_breakdown/pointer_loss": 0.3490488827228546, "step": 3870 }, { "epoch": 0.39566098583829395, "grad_norm": 32.720834192382675, "learning_rate": 3.3580736543909354e-06, "loss": 0.2736, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 8.805459401628468e-06, "loss_breakdown/pointer_loss": 2.512017250061035, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 1.0704768101277296e-05, "loss_breakdown/pointer_loss": 0.11077041178941727, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 9.337886694993358e-06, "loss_breakdown/pointer_loss": 0.13006357848644257, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 9.075651178136468e-06, "loss_breakdown/pointer_loss": 0.19914087653160095, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 1.2957682884007227e-05, "loss_breakdown/pointer_loss": 0.18506157398223877, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 1.298535516980337e-05, "loss_breakdown/pointer_loss": 0.07716892659664154, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 1.2758950106217526e-05, "loss_breakdown/pointer_loss": 0.15189766883850098, "step": 3880 }, { "epoch": 0.39566098583829395, "loss_breakdown/lm_loss": 1.2298104593355674e-05, "loss_breakdown/pointer_loss": 0.12047763168811798, "step": 3880 }, { "epoch": 0.39668073064715553, "grad_norm": 2.3771918221280317, "learning_rate": 3.352407932011332e-06, "loss": 0.3036, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 9.769351891009137e-06, "loss_breakdown/pointer_loss": 0.2095353752374649, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 1.6418267478002235e-05, "loss_breakdown/pointer_loss": 1.7282651662826538, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 7.478288807760691e-06, "loss_breakdown/pointer_loss": 1.4686107635498047, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 1.0081716027343646e-05, "loss_breakdown/pointer_loss": 0.16152864694595337, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 1.0005393960454967e-05, "loss_breakdown/pointer_loss": 0.37231943011283875, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 1.1340556739014573e-05, "loss_breakdown/pointer_loss": 0.6437031626701355, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 7.443855793098919e-06, "loss_breakdown/pointer_loss": 0.1548387110233307, "step": 3890 }, { "epoch": 0.39668073064715553, "loss_breakdown/lm_loss": 8.544525371689815e-06, "loss_breakdown/pointer_loss": 0.5933196544647217, "step": 3890 }, { "epoch": 0.3977004754560171, "grad_norm": 13.358525705432196, "learning_rate": 3.346742209631728e-06, "loss": 0.2667, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 6.19644924881868e-05, "loss_breakdown/pointer_loss": 3.5732004642486572, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 2.1905481844441965e-05, "loss_breakdown/pointer_loss": 1.576566219329834, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 1.6831876564538106e-05, "loss_breakdown/pointer_loss": 1.791520357131958, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 2.0053557818755507e-05, "loss_breakdown/pointer_loss": 0.9186811447143555, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 1.1041254765586928e-05, "loss_breakdown/pointer_loss": 0.8353911638259888, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 1.202303155878326e-05, "loss_breakdown/pointer_loss": 1.1780695915222168, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 1.027943471854087e-05, "loss_breakdown/pointer_loss": 0.2771396338939667, "step": 3900 }, { "epoch": 0.3977004754560171, "loss_breakdown/lm_loss": 1.159115072368877e-05, "loss_breakdown/pointer_loss": 0.5318225622177124, "step": 3900 }, { "epoch": 0.3987202202648787, "grad_norm": 2.214319822490425, "learning_rate": 3.341076487252125e-06, "loss": 0.2685, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 1.0625312825141009e-05, "loss_breakdown/pointer_loss": 0.08709730207920074, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 1.0122701496584341e-05, "loss_breakdown/pointer_loss": 0.26779288053512573, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 8.372330739803147e-06, "loss_breakdown/pointer_loss": 0.3381183445453644, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 3.320203541079536e-05, "loss_breakdown/pointer_loss": 2.5712199211120605, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 8.904753485694528e-06, "loss_breakdown/pointer_loss": 0.17464590072631836, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 1.3541901353164576e-05, "loss_breakdown/pointer_loss": 0.5515515804290771, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 1.55507314048009e-05, "loss_breakdown/pointer_loss": 0.48730960488319397, "step": 3910 }, { "epoch": 0.3987202202648787, "loss_breakdown/lm_loss": 8.68627739691874e-06, "loss_breakdown/pointer_loss": 0.36600178480148315, "step": 3910 }, { "epoch": 0.3997399650737403, "grad_norm": 2.169033239844548, "learning_rate": 3.3354107648725214e-06, "loss": 0.3068, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 1.161391719506355e-05, "loss_breakdown/pointer_loss": 0.33640575408935547, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 8.374334356631152e-06, "loss_breakdown/pointer_loss": 0.6375123262405396, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 8.86589532456128e-06, "loss_breakdown/pointer_loss": 0.5623233318328857, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 1.2406151654431596e-05, "loss_breakdown/pointer_loss": 0.6512628197669983, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 8.864440133038443e-06, "loss_breakdown/pointer_loss": 0.4587446451187134, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 8.865549716574606e-06, "loss_breakdown/pointer_loss": 0.5653249025344849, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 1.4440977793128695e-05, "loss_breakdown/pointer_loss": 1.056187391281128, "step": 3920 }, { "epoch": 0.3997399650737403, "loss_breakdown/lm_loss": 8.842701390676666e-06, "loss_breakdown/pointer_loss": 1.4535495042800903, "step": 3920 }, { "epoch": 0.4007597098826019, "grad_norm": 10.744673599711628, "learning_rate": 3.329745042492918e-06, "loss": 0.2724, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 7.59749173084856e-06, "loss_breakdown/pointer_loss": 0.0482635572552681, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 1.1757792890421115e-05, "loss_breakdown/pointer_loss": 1.5387134552001953, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 9.763091838976834e-06, "loss_breakdown/pointer_loss": 0.1305072158575058, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 8.789568710199092e-06, "loss_breakdown/pointer_loss": 0.48775678873062134, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 1.1274275493633468e-05, "loss_breakdown/pointer_loss": 0.1396927833557129, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 7.645182449778076e-06, "loss_breakdown/pointer_loss": 0.10987068712711334, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 1.5572230040561408e-05, "loss_breakdown/pointer_loss": 0.21712899208068848, "step": 3930 }, { "epoch": 0.4007597098826019, "loss_breakdown/lm_loss": 9.31404883885989e-06, "loss_breakdown/pointer_loss": 0.36247044801712036, "step": 3930 }, { "epoch": 0.4017794546914635, "grad_norm": 4.742714977038985, "learning_rate": 3.324079320113315e-06, "loss": 0.3085, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 1.0609432138153352e-05, "loss_breakdown/pointer_loss": 0.10533613711595535, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 7.503634606109699e-06, "loss_breakdown/pointer_loss": 0.4123937487602234, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 7.347741302510258e-06, "loss_breakdown/pointer_loss": 0.19694286584854126, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 6.624979960179189e-06, "loss_breakdown/pointer_loss": 0.15174734592437744, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 9.101475370698608e-06, "loss_breakdown/pointer_loss": 0.1208026260137558, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 8.85611825651722e-06, "loss_breakdown/pointer_loss": 1.0042411088943481, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 8.051243639783934e-06, "loss_breakdown/pointer_loss": 1.0453219413757324, "step": 3940 }, { "epoch": 0.4017794546914635, "loss_breakdown/lm_loss": 1.0026153177022934e-05, "loss_breakdown/pointer_loss": 0.5874142646789551, "step": 3940 }, { "epoch": 0.40279919950032506, "grad_norm": 13.137065133205292, "learning_rate": 3.3184135977337113e-06, "loss": 0.2871, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 0.0001390849647577852, "loss_breakdown/pointer_loss": 1.1938451528549194, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 1.8411121345707215e-05, "loss_breakdown/pointer_loss": 0.45235180854797363, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 9.726648568175733e-05, "loss_breakdown/pointer_loss": 0.5664609670639038, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 1.1969658771704417e-05, "loss_breakdown/pointer_loss": 0.5879086256027222, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 1.0921702596533578e-05, "loss_breakdown/pointer_loss": 0.7772035598754883, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 1.2212956789880991e-05, "loss_breakdown/pointer_loss": 0.6912158727645874, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 1.4454007214226294e-05, "loss_breakdown/pointer_loss": 0.42574095726013184, "step": 3950 }, { "epoch": 0.40279919950032506, "loss_breakdown/lm_loss": 9.765173672349192e-06, "loss_breakdown/pointer_loss": 0.4600350558757782, "step": 3950 }, { "epoch": 0.40381894430918663, "grad_norm": 3.864326132395194, "learning_rate": 3.3127478753541082e-06, "loss": 0.2787, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 1.117206375056412e-05, "loss_breakdown/pointer_loss": 0.5136234760284424, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 1.1565016393433325e-05, "loss_breakdown/pointer_loss": 0.1298673152923584, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 7.452475074387621e-06, "loss_breakdown/pointer_loss": 0.4020079970359802, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 1.3664411198988091e-05, "loss_breakdown/pointer_loss": 1.0539954900741577, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 9.818699254537933e-06, "loss_breakdown/pointer_loss": 0.4039512872695923, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 1.067101038643159e-05, "loss_breakdown/pointer_loss": 0.27008339762687683, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 1.0986829693138134e-05, "loss_breakdown/pointer_loss": 0.1457909494638443, "step": 3960 }, { "epoch": 0.40381894430918663, "loss_breakdown/lm_loss": 1.0005463991547003e-05, "loss_breakdown/pointer_loss": 0.633222222328186, "step": 3960 }, { "epoch": 0.4048386891180482, "grad_norm": 8.480206032968267, "learning_rate": 3.3070821529745043e-06, "loss": 0.3283, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 1.2632203834073152e-05, "loss_breakdown/pointer_loss": 0.8634089231491089, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 1.1822762644442264e-05, "loss_breakdown/pointer_loss": 1.3250705003738403, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 9.810943993215915e-06, "loss_breakdown/pointer_loss": 0.4949970841407776, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 1.2972716831427533e-05, "loss_breakdown/pointer_loss": 0.38700297474861145, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 8.5370793385664e-06, "loss_breakdown/pointer_loss": 0.738153338432312, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 8.173376954800915e-06, "loss_breakdown/pointer_loss": 0.2602406144142151, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 1.6428859453299083e-05, "loss_breakdown/pointer_loss": 0.49601858854293823, "step": 3970 }, { "epoch": 0.4048386891180482, "loss_breakdown/lm_loss": 6.835721251263749e-06, "loss_breakdown/pointer_loss": 0.2533642649650574, "step": 3970 }, { "epoch": 0.4058584339269098, "grad_norm": 16.623335936780386, "learning_rate": 3.301416430594901e-06, "loss": 0.2757, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 1.0943168490484823e-05, "loss_breakdown/pointer_loss": 0.14987388253211975, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 9.75116290646838e-06, "loss_breakdown/pointer_loss": 0.1688666045665741, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 1.1575047210499179e-05, "loss_breakdown/pointer_loss": 0.14141440391540527, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 1.2186895219201688e-05, "loss_breakdown/pointer_loss": 0.09951648116111755, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 1.6919088011491112e-05, "loss_breakdown/pointer_loss": 1.0969514846801758, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 9.027970918396022e-06, "loss_breakdown/pointer_loss": 0.11950207501649857, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 1.9501654605846852e-05, "loss_breakdown/pointer_loss": 0.22381356358528137, "step": 3980 }, { "epoch": 0.4058584339269098, "loss_breakdown/lm_loss": 1.2643803529499564e-05, "loss_breakdown/pointer_loss": 0.19527560472488403, "step": 3980 }, { "epoch": 0.40687817873577137, "grad_norm": 2.658508661525976, "learning_rate": 3.2957507082152978e-06, "loss": 0.3038, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 9.004091225506272e-06, "loss_breakdown/pointer_loss": 0.35903143882751465, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 7.974623258633073e-06, "loss_breakdown/pointer_loss": 0.5159913897514343, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 1.3157839930499904e-05, "loss_breakdown/pointer_loss": 1.105216383934021, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 9.633196896174923e-06, "loss_breakdown/pointer_loss": 0.15783065557479858, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 1.108291053242283e-05, "loss_breakdown/pointer_loss": 0.19759601354599, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 9.485800546826795e-06, "loss_breakdown/pointer_loss": 1.3714677095413208, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 1.319402963417815e-05, "loss_breakdown/pointer_loss": 0.6542028188705444, "step": 3990 }, { "epoch": 0.40687817873577137, "loss_breakdown/lm_loss": 1.0565020602371078e-05, "loss_breakdown/pointer_loss": 0.6520317792892456, "step": 3990 }, { "epoch": 0.40789792354463295, "grad_norm": 11.174886036821917, "learning_rate": 3.2900849858356943e-06, "loss": 0.2578, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 9.970692917704582e-05, "loss_breakdown/pointer_loss": 1.9837007522583008, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 2.9878570785513148e-05, "loss_breakdown/pointer_loss": 0.3472197651863098, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 1.3794655387755483e-05, "loss_breakdown/pointer_loss": 0.45761168003082275, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 2.6265133783454075e-05, "loss_breakdown/pointer_loss": 0.9881061315536499, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 1.6880234397831373e-05, "loss_breakdown/pointer_loss": 0.6453959345817566, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 1.5298108337447047e-05, "loss_breakdown/pointer_loss": 0.682571291923523, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 1.4316174201667309e-05, "loss_breakdown/pointer_loss": 0.8847769498825073, "step": 4000 }, { "epoch": 0.40789792354463295, "loss_breakdown/lm_loss": 3.594611553126015e-05, "loss_breakdown/pointer_loss": 0.6336143016815186, "step": 4000 }, { "epoch": 0.4089176683534945, "grad_norm": 3.289486496817027, "learning_rate": 3.284419263456091e-06, "loss": 0.2815, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 9.001484613690991e-06, "loss_breakdown/pointer_loss": 0.09618856012821198, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 9.302708349423483e-06, "loss_breakdown/pointer_loss": 0.1754348874092102, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 9.556962140777614e-06, "loss_breakdown/pointer_loss": 0.2295849770307541, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 1.597363007022068e-05, "loss_breakdown/pointer_loss": 0.04193894937634468, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 8.161746336554643e-06, "loss_breakdown/pointer_loss": 0.15885791182518005, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 1.060941031028051e-05, "loss_breakdown/pointer_loss": 0.43791207671165466, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 9.651761502027512e-06, "loss_breakdown/pointer_loss": 0.058451198041439056, "step": 4010 }, { "epoch": 0.4089176683534945, "loss_breakdown/lm_loss": 1.2047823474858887e-05, "loss_breakdown/pointer_loss": 0.9386302828788757, "step": 4010 }, { "epoch": 0.4099374131623561, "grad_norm": 2.98864963822813, "learning_rate": 3.2787535410764877e-06, "loss": 0.3076, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 8.817738489597104e-06, "loss_breakdown/pointer_loss": 0.495449036359787, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 1.2506061466410756e-05, "loss_breakdown/pointer_loss": 0.4541187882423401, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 1.0730447684181854e-05, "loss_breakdown/pointer_loss": 0.5222293138504028, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 8.707324013812467e-06, "loss_breakdown/pointer_loss": 0.5074267983436584, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 1.1336837815179024e-05, "loss_breakdown/pointer_loss": 0.4250584840774536, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 1.663823059061542e-05, "loss_breakdown/pointer_loss": 1.2269327640533447, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 9.561395927448757e-06, "loss_breakdown/pointer_loss": 0.5515661239624023, "step": 4020 }, { "epoch": 0.4099374131623561, "loss_breakdown/lm_loss": 8.23099890112644e-06, "loss_breakdown/pointer_loss": 0.26026657223701477, "step": 4020 }, { "epoch": 0.4109571579712177, "grad_norm": 13.069846292096186, "learning_rate": 3.2730878186968838e-06, "loss": 0.2629, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 8.968369911599439e-06, "loss_breakdown/pointer_loss": 0.46562960743904114, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 1.2536521353467833e-05, "loss_breakdown/pointer_loss": 0.15912599861621857, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 1.707017327134963e-05, "loss_breakdown/pointer_loss": 0.09753202646970749, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 1.0033268154074904e-05, "loss_breakdown/pointer_loss": 4.312438488006592, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 8.865046766004525e-06, "loss_breakdown/pointer_loss": 0.41368332505226135, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 6.989529538259376e-06, "loss_breakdown/pointer_loss": 0.18970322608947754, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 9.143213901552372e-06, "loss_breakdown/pointer_loss": 0.0380702018737793, "step": 4030 }, { "epoch": 0.4109571579712177, "loss_breakdown/lm_loss": 9.043836143973749e-06, "loss_breakdown/pointer_loss": 0.0488932728767395, "step": 4030 }, { "epoch": 0.4119769027800793, "grad_norm": 2.9113019148657506, "learning_rate": 3.267422096317281e-06, "loss": 0.3251, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 9.81979064818006e-06, "loss_breakdown/pointer_loss": 1.1843996047973633, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 9.535638127999846e-06, "loss_breakdown/pointer_loss": 1.4933431148529053, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 1.2322613656579051e-05, "loss_breakdown/pointer_loss": 0.2662346661090851, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 8.744916158320848e-06, "loss_breakdown/pointer_loss": 0.24198240041732788, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 9.054067049873993e-06, "loss_breakdown/pointer_loss": 0.32086098194122314, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 1.2682450687862001e-05, "loss_breakdown/pointer_loss": 1.2011278867721558, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 9.104807759285904e-06, "loss_breakdown/pointer_loss": 0.2338819056749344, "step": 4040 }, { "epoch": 0.4119769027800793, "loss_breakdown/lm_loss": 1.0599065717542544e-05, "loss_breakdown/pointer_loss": 0.2425653487443924, "step": 4040 }, { "epoch": 0.4129966475889409, "grad_norm": 4.3451169171272355, "learning_rate": 3.261756373937677e-06, "loss": 0.2824, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 4.277214975445531e-05, "loss_breakdown/pointer_loss": 2.7878293991088867, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 2.2203168555279262e-05, "loss_breakdown/pointer_loss": 0.9167288541793823, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 1.853979119914584e-05, "loss_breakdown/pointer_loss": 0.6336870789527893, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 1.3970825420983601e-05, "loss_breakdown/pointer_loss": 0.31905487179756165, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 1.4220571756595746e-05, "loss_breakdown/pointer_loss": 1.1347222328186035, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 1.5739262380520813e-05, "loss_breakdown/pointer_loss": 0.2763451039791107, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 1.5040310245240107e-05, "loss_breakdown/pointer_loss": 0.42500150203704834, "step": 4050 }, { "epoch": 0.4129966475889409, "loss_breakdown/lm_loss": 1.2409957889758516e-05, "loss_breakdown/pointer_loss": 0.539251446723938, "step": 4050 }, { "epoch": 0.41401639239780247, "grad_norm": 8.043559845316041, "learning_rate": 3.2560906515580737e-06, "loss": 0.2771, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 1.2023156159557402e-05, "loss_breakdown/pointer_loss": 0.4298068583011627, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 1.2864189557149075e-05, "loss_breakdown/pointer_loss": 0.20072440803050995, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 9.232784577761777e-06, "loss_breakdown/pointer_loss": 0.8717970848083496, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 1.1702073607011698e-05, "loss_breakdown/pointer_loss": 0.1494942009449005, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 9.000172212836333e-06, "loss_breakdown/pointer_loss": 0.3182673156261444, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 7.944150638650171e-06, "loss_breakdown/pointer_loss": 0.18542124330997467, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 9.161007255897857e-06, "loss_breakdown/pointer_loss": 0.28686001896858215, "step": 4060 }, { "epoch": 0.41401639239780247, "loss_breakdown/lm_loss": 7.724642273387872e-06, "loss_breakdown/pointer_loss": 0.42577069997787476, "step": 4060 }, { "epoch": 0.41503613720666405, "grad_norm": 3.0092081650732516, "learning_rate": 3.2504249291784706e-06, "loss": 0.2843, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 7.580580586363794e-06, "loss_breakdown/pointer_loss": 0.34312689304351807, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 1.3760743968305178e-05, "loss_breakdown/pointer_loss": 0.7622311115264893, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 1.2239463103469461e-05, "loss_breakdown/pointer_loss": 1.2244824171066284, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 8.652677024656441e-06, "loss_breakdown/pointer_loss": 0.7722011804580688, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 1.069224344973918e-05, "loss_breakdown/pointer_loss": 0.26426181197166443, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 9.06988134374842e-06, "loss_breakdown/pointer_loss": 0.6702137589454651, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 9.172555110126268e-06, "loss_breakdown/pointer_loss": 0.7017078995704651, "step": 4070 }, { "epoch": 0.41503613720666405, "loss_breakdown/lm_loss": 9.306587344326545e-06, "loss_breakdown/pointer_loss": 0.20130445063114166, "step": 4070 }, { "epoch": 0.4160558820155256, "grad_norm": 11.951545870144587, "learning_rate": 3.244759206798867e-06, "loss": 0.2777, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 8.93258948053699e-06, "loss_breakdown/pointer_loss": 0.4173539876937866, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 1.2997440535400528e-05, "loss_breakdown/pointer_loss": 0.9901322722434998, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 2.377239434281364e-05, "loss_breakdown/pointer_loss": 1.4597547054290771, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 2.2939579139347188e-05, "loss_breakdown/pointer_loss": 1.9183170795440674, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 9.834514457907062e-06, "loss_breakdown/pointer_loss": 0.20382845401763916, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 9.85445331025403e-06, "loss_breakdown/pointer_loss": 5.780938148498535, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 1.0891530109802261e-05, "loss_breakdown/pointer_loss": 0.1469089388847351, "step": 4080 }, { "epoch": 0.4160558820155256, "loss_breakdown/lm_loss": 2.323548869753722e-05, "loss_breakdown/pointer_loss": 0.5126416683197021, "step": 4080 }, { "epoch": 0.4170756268243872, "grad_norm": 4.60052852348565, "learning_rate": 3.239093484419264e-06, "loss": 0.2735, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 9.34896343096625e-06, "loss_breakdown/pointer_loss": 0.2656800448894501, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 9.453350685362238e-06, "loss_breakdown/pointer_loss": 0.13822069764137268, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 9.583648534317035e-06, "loss_breakdown/pointer_loss": 0.3031346797943115, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 1.0196080438618083e-05, "loss_breakdown/pointer_loss": 0.3273168206214905, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 8.964330845628865e-06, "loss_breakdown/pointer_loss": 0.13631576299667358, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 1.1868362889799755e-05, "loss_breakdown/pointer_loss": 1.0969667434692383, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 1.031532883644104e-05, "loss_breakdown/pointer_loss": 0.1374640166759491, "step": 4090 }, { "epoch": 0.4170756268243872, "loss_breakdown/lm_loss": 9.50876983552007e-06, "loss_breakdown/pointer_loss": 0.566396951675415, "step": 4090 }, { "epoch": 0.4180953716332488, "grad_norm": 10.86463682148065, "learning_rate": 3.23342776203966e-06, "loss": 0.2779, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 4.571395766106434e-05, "loss_breakdown/pointer_loss": 3.1396255493164062, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 2.6686882847570814e-05, "loss_breakdown/pointer_loss": 0.36811500787734985, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 2.237932858406566e-05, "loss_breakdown/pointer_loss": 0.8590116500854492, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 1.3142815987521317e-05, "loss_breakdown/pointer_loss": 0.5634778141975403, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 1.8276450646226294e-05, "loss_breakdown/pointer_loss": 0.7788138389587402, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 1.0514375389902852e-05, "loss_breakdown/pointer_loss": 0.6789523959159851, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 1.2643058653338812e-05, "loss_breakdown/pointer_loss": 1.4346638917922974, "step": 4100 }, { "epoch": 0.4180953716332488, "loss_breakdown/lm_loss": 1.1019812518497929e-05, "loss_breakdown/pointer_loss": 0.8818374872207642, "step": 4100 }, { "epoch": 0.41911511644211036, "grad_norm": 2.468715197885192, "learning_rate": 3.2277620396600566e-06, "loss": 0.272, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 1.5149569662753493e-05, "loss_breakdown/pointer_loss": 0.09594960510730743, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 7.163816917454824e-06, "loss_breakdown/pointer_loss": 0.12315213680267334, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 9.102032890950795e-06, "loss_breakdown/pointer_loss": 0.07239216566085815, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 9.366969607071951e-06, "loss_breakdown/pointer_loss": 0.3224281966686249, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 8.55645521369297e-06, "loss_breakdown/pointer_loss": 0.12198871374130249, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 1.678350417932961e-05, "loss_breakdown/pointer_loss": 0.2814081013202667, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 1.1618606549745891e-05, "loss_breakdown/pointer_loss": 0.24853137135505676, "step": 4110 }, { "epoch": 0.41911511644211036, "loss_breakdown/lm_loss": 1.1966282727371436e-05, "loss_breakdown/pointer_loss": 0.21468782424926758, "step": 4110 }, { "epoch": 0.42013486125097194, "grad_norm": 8.969123473371496, "learning_rate": 3.2220963172804536e-06, "loss": 0.317, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 1.2920343579025939e-05, "loss_breakdown/pointer_loss": 0.3388829827308655, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 9.093856533581857e-06, "loss_breakdown/pointer_loss": 0.45018890500068665, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 1.1203928806935437e-05, "loss_breakdown/pointer_loss": 0.29620885848999023, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 1.1268857633695006e-05, "loss_breakdown/pointer_loss": 0.40373319387435913, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 1.0016472515417263e-05, "loss_breakdown/pointer_loss": 0.24129292368888855, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 8.34175352792954e-06, "loss_breakdown/pointer_loss": 0.9395109415054321, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 8.76635021995753e-06, "loss_breakdown/pointer_loss": 0.4091317057609558, "step": 4120 }, { "epoch": 0.42013486125097194, "loss_breakdown/lm_loss": 8.419997357123066e-06, "loss_breakdown/pointer_loss": 0.09729675948619843, "step": 4120 }, { "epoch": 0.4211546060598335, "grad_norm": 7.497869653021472, "learning_rate": 3.21643059490085e-06, "loss": 0.2608, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 2.2345433535519987e-05, "loss_breakdown/pointer_loss": 0.23211921751499176, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 2.7427538952906616e-05, "loss_breakdown/pointer_loss": 0.06460142135620117, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 1.0347173883928917e-05, "loss_breakdown/pointer_loss": 0.26270124316215515, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 2.088789733534213e-05, "loss_breakdown/pointer_loss": 2.2599313259124756, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 9.898156349663623e-06, "loss_breakdown/pointer_loss": 2.638780117034912, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 3.872082015732303e-05, "loss_breakdown/pointer_loss": 1.0754424333572388, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 4.353926487965509e-05, "loss_breakdown/pointer_loss": 1.3554372787475586, "step": 4130 }, { "epoch": 0.4211546060598335, "loss_breakdown/lm_loss": 1.940964284585789e-05, "loss_breakdown/pointer_loss": 0.25482913851737976, "step": 4130 }, { "epoch": 0.4221743508686951, "grad_norm": 2.9046410351572023, "learning_rate": 3.2107648725212466e-06, "loss": 0.3088, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 7.4274284997954965e-06, "loss_breakdown/pointer_loss": 0.6371613144874573, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 7.377625479421113e-06, "loss_breakdown/pointer_loss": 0.26532095670700073, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 1.035877357935533e-05, "loss_breakdown/pointer_loss": 0.5778339505195618, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 9.701434464659542e-06, "loss_breakdown/pointer_loss": 0.09468626976013184, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 7.3972241807496175e-06, "loss_breakdown/pointer_loss": 0.6270560622215271, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 8.404110303672496e-06, "loss_breakdown/pointer_loss": 0.33997559547424316, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 7.533917141699931e-06, "loss_breakdown/pointer_loss": 0.7775875329971313, "step": 4140 }, { "epoch": 0.4221743508686951, "loss_breakdown/lm_loss": 8.649467417853884e-06, "loss_breakdown/pointer_loss": 0.979729175567627, "step": 4140 }, { "epoch": 0.42319409567755667, "grad_norm": 7.533740158539046, "learning_rate": 3.2050991501416435e-06, "loss": 0.2612, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 2.4236245735664852e-05, "loss_breakdown/pointer_loss": 1.1920878887176514, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 1.520007754152175e-05, "loss_breakdown/pointer_loss": 0.7555503845214844, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 1.5655083188903518e-05, "loss_breakdown/pointer_loss": 0.6603716015815735, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 1.2608962606464047e-05, "loss_breakdown/pointer_loss": 0.49924707412719727, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 9.831623174250126e-06, "loss_breakdown/pointer_loss": 0.20446527004241943, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 1.0565086995484307e-05, "loss_breakdown/pointer_loss": 0.6895197629928589, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 8.528941179974936e-06, "loss_breakdown/pointer_loss": 0.6660561561584473, "step": 4150 }, { "epoch": 0.42319409567755667, "loss_breakdown/lm_loss": 8.034822712943424e-06, "loss_breakdown/pointer_loss": 1.1306252479553223, "step": 4150 }, { "epoch": 0.42421384048641825, "grad_norm": 3.885233587428212, "learning_rate": 3.1994334277620396e-06, "loss": 0.2705, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 8.108750989777036e-06, "loss_breakdown/pointer_loss": 0.08466868102550507, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 1.416341183357872e-05, "loss_breakdown/pointer_loss": 0.172592431306839, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 1.3916774150857236e-05, "loss_breakdown/pointer_loss": 0.6375950574874878, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 1.0347198440285865e-05, "loss_breakdown/pointer_loss": 0.5281926393508911, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 1.0747089618234895e-05, "loss_breakdown/pointer_loss": 0.4560220539569855, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 9.520677849650383e-06, "loss_breakdown/pointer_loss": 0.22559669613838196, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 1.5373643691418692e-05, "loss_breakdown/pointer_loss": 1.9942575693130493, "step": 4160 }, { "epoch": 0.42421384048641825, "loss_breakdown/lm_loss": 1.0696802746679168e-05, "loss_breakdown/pointer_loss": 0.09540436416864395, "step": 4160 }, { "epoch": 0.4252335852952799, "grad_norm": 3.2868340266479894, "learning_rate": 3.193767705382437e-06, "loss": 0.2992, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 1.2988629350729752e-05, "loss_breakdown/pointer_loss": 0.4217461943626404, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 1.2212663023092318e-05, "loss_breakdown/pointer_loss": 0.5191469788551331, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 9.477506864641327e-06, "loss_breakdown/pointer_loss": 0.33796054124832153, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 8.610925760876853e-06, "loss_breakdown/pointer_loss": 0.22100113332271576, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 1.0675577868823893e-05, "loss_breakdown/pointer_loss": 0.36707833409309387, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 1.5161901501414832e-05, "loss_breakdown/pointer_loss": 0.4628744423389435, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 8.535572305845562e-06, "loss_breakdown/pointer_loss": 0.19078172743320465, "step": 4170 }, { "epoch": 0.4252335852952799, "loss_breakdown/lm_loss": 8.575890205975156e-06, "loss_breakdown/pointer_loss": 0.4537292718887329, "step": 4170 }, { "epoch": 0.42625333010414146, "grad_norm": 9.320191296722914, "learning_rate": 3.188101983002833e-06, "loss": 0.2687, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 1.0803927580127493e-05, "loss_breakdown/pointer_loss": 0.10773463547229767, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 1.1697993613779545e-05, "loss_breakdown/pointer_loss": 2.6776084899902344, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 1.0776208910101559e-05, "loss_breakdown/pointer_loss": 3.4214494228363037, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 2.0164112356724218e-05, "loss_breakdown/pointer_loss": 0.06523075699806213, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 8.265050382760819e-06, "loss_breakdown/pointer_loss": 0.0780298039317131, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 7.986898708622903e-06, "loss_breakdown/pointer_loss": 0.14880278706550598, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 1.2111048818042036e-05, "loss_breakdown/pointer_loss": 0.18854568898677826, "step": 4180 }, { "epoch": 0.42625333010414146, "loss_breakdown/lm_loss": 3.248577195336111e-05, "loss_breakdown/pointer_loss": 0.20936957001686096, "step": 4180 }, { "epoch": 0.42727307491300304, "grad_norm": 8.380090864579431, "learning_rate": 3.1824362606232295e-06, "loss": 0.3037, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 2.1360561731853522e-05, "loss_breakdown/pointer_loss": 0.1607651710510254, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 1.0547644706093706e-05, "loss_breakdown/pointer_loss": 1.454010009765625, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 7.058852588670561e-06, "loss_breakdown/pointer_loss": 0.12252289056777954, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 8.277218512375839e-06, "loss_breakdown/pointer_loss": 0.411910742521286, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 8.887675903679337e-06, "loss_breakdown/pointer_loss": 0.19248133897781372, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 9.074768968275748e-06, "loss_breakdown/pointer_loss": 0.20230773091316223, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 1.0513876077311579e-05, "loss_breakdown/pointer_loss": 0.1791810840368271, "step": 4190 }, { "epoch": 0.42727307491300304, "loss_breakdown/lm_loss": 9.91924935078714e-06, "loss_breakdown/pointer_loss": 0.16895711421966553, "step": 4190 }, { "epoch": 0.4282928197218646, "grad_norm": 23.83092005870449, "learning_rate": 3.1767705382436265e-06, "loss": 0.2676, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 5.482864798977971e-05, "loss_breakdown/pointer_loss": 1.8374240398406982, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.807860280678142e-05, "loss_breakdown/pointer_loss": 0.4629170894622803, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.676023384789005e-05, "loss_breakdown/pointer_loss": 0.5776526927947998, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.4389617717824876e-05, "loss_breakdown/pointer_loss": 0.5306105017662048, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.2177151802461594e-05, "loss_breakdown/pointer_loss": 0.6828509569168091, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.86313918675296e-05, "loss_breakdown/pointer_loss": 0.29872989654541016, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.0346300769015215e-05, "loss_breakdown/pointer_loss": 0.569311261177063, "step": 4200 }, { "epoch": 0.4282928197218646, "loss_breakdown/lm_loss": 1.2967478141945321e-05, "loss_breakdown/pointer_loss": 0.7719045877456665, "step": 4200 }, { "epoch": 0.4293125645307262, "grad_norm": 5.054897608397296, "learning_rate": 3.171104815864023e-06, "loss": 0.28, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 9.812728421820793e-06, "loss_breakdown/pointer_loss": 0.2315784990787506, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 9.178960681310855e-06, "loss_breakdown/pointer_loss": 0.13084742426872253, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 9.639868039812427e-06, "loss_breakdown/pointer_loss": 0.30947861075401306, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 1.4288480997493025e-05, "loss_breakdown/pointer_loss": 0.12193670868873596, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 1.0849793397937901e-05, "loss_breakdown/pointer_loss": 0.4198794364929199, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 1.2826568308810238e-05, "loss_breakdown/pointer_loss": 1.59153413772583, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 8.660428647999652e-06, "loss_breakdown/pointer_loss": 0.07628633081912994, "step": 4210 }, { "epoch": 0.4293125645307262, "loss_breakdown/lm_loss": 8.161741789081134e-06, "loss_breakdown/pointer_loss": 0.19977931678295135, "step": 4210 }, { "epoch": 0.43033230933958777, "grad_norm": 3.855863333040208, "learning_rate": 3.165439093484419e-06, "loss": 0.2983, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 1.361326030746568e-05, "loss_breakdown/pointer_loss": 0.3050106465816498, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 1.163325214292854e-05, "loss_breakdown/pointer_loss": 0.4158395826816559, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 9.512687029200606e-06, "loss_breakdown/pointer_loss": 0.3130995035171509, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 3.1284234864870086e-05, "loss_breakdown/pointer_loss": 1.3002455234527588, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 7.89900695963297e-06, "loss_breakdown/pointer_loss": 0.49994295835494995, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 9.37113418331137e-06, "loss_breakdown/pointer_loss": 0.4551016092300415, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 1.7952041162061505e-05, "loss_breakdown/pointer_loss": 0.1995261311531067, "step": 4220 }, { "epoch": 0.43033230933958777, "loss_breakdown/lm_loss": 7.025316790532088e-06, "loss_breakdown/pointer_loss": 0.5621317625045776, "step": 4220 }, { "epoch": 0.43135205414844935, "grad_norm": 18.938325730170867, "learning_rate": 3.1597733711048164e-06, "loss": 0.2615, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 7.553798241133336e-06, "loss_breakdown/pointer_loss": 2.351961612701416, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 9.465054972679354e-06, "loss_breakdown/pointer_loss": 0.21562626957893372, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 8.880828318069689e-06, "loss_breakdown/pointer_loss": 0.21783310174942017, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 2.135158865712583e-05, "loss_breakdown/pointer_loss": 0.12218964099884033, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 4.1117942600976676e-05, "loss_breakdown/pointer_loss": 0.5267432332038879, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 7.772317076160107e-06, "loss_breakdown/pointer_loss": 0.04139360412955284, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 1.0080900210596155e-05, "loss_breakdown/pointer_loss": 1.3249917030334473, "step": 4230 }, { "epoch": 0.43135205414844935, "loss_breakdown/lm_loss": 1.2476857591536827e-05, "loss_breakdown/pointer_loss": 0.25251126289367676, "step": 4230 }, { "epoch": 0.4323717989573109, "grad_norm": 3.802486743519012, "learning_rate": 3.1541076487252125e-06, "loss": 0.3055, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 7.63296975492267e-06, "loss_breakdown/pointer_loss": 0.5485297441482544, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 8.598829481343273e-06, "loss_breakdown/pointer_loss": 0.07228921353816986, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 1.2543871889647562e-05, "loss_breakdown/pointer_loss": 0.5595079064369202, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 9.5722416517674e-06, "loss_breakdown/pointer_loss": 0.20720292627811432, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 6.600590040761745e-06, "loss_breakdown/pointer_loss": 0.35893499851226807, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 9.353784662380349e-06, "loss_breakdown/pointer_loss": 0.19316333532333374, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 1.0470302186149638e-05, "loss_breakdown/pointer_loss": 0.50960373878479, "step": 4240 }, { "epoch": 0.4323717989573109, "loss_breakdown/lm_loss": 8.606739356764592e-06, "loss_breakdown/pointer_loss": 0.28133624792099, "step": 4240 }, { "epoch": 0.4333915437661725, "grad_norm": 15.544418012712942, "learning_rate": 3.1484419263456094e-06, "loss": 0.2698, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 2.194827357016038e-05, "loss_breakdown/pointer_loss": 0.5916727185249329, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 1.3355746887100395e-05, "loss_breakdown/pointer_loss": 0.4780535101890564, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 1.4131247553450521e-05, "loss_breakdown/pointer_loss": 1.1968066692352295, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 2.198373658757191e-05, "loss_breakdown/pointer_loss": 0.6657360792160034, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 1.423128560418263e-05, "loss_breakdown/pointer_loss": 0.6153050661087036, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 1.3997866517456714e-05, "loss_breakdown/pointer_loss": 0.6051293015480042, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 9.618921467335895e-06, "loss_breakdown/pointer_loss": 0.793594241142273, "step": 4250 }, { "epoch": 0.4333915437661725, "loss_breakdown/lm_loss": 8.677114237798378e-06, "loss_breakdown/pointer_loss": 0.53191077709198, "step": 4250 }, { "epoch": 0.4344112885750341, "grad_norm": 7.127134654001616, "learning_rate": 3.142776203966006e-06, "loss": 0.2713, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 8.80146399140358e-06, "loss_breakdown/pointer_loss": 0.3376176953315735, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 8.449806955468375e-06, "loss_breakdown/pointer_loss": 0.10613428056240082, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 8.16175543150166e-06, "loss_breakdown/pointer_loss": 0.16985031962394714, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 1.5281715604942292e-05, "loss_breakdown/pointer_loss": 0.16193869709968567, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 9.188476724375505e-06, "loss_breakdown/pointer_loss": 0.12895750999450684, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 1.0835842658707406e-05, "loss_breakdown/pointer_loss": 0.21423953771591187, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 1.463044281990733e-05, "loss_breakdown/pointer_loss": 0.34358304738998413, "step": 4260 }, { "epoch": 0.4344112885750341, "loss_breakdown/lm_loss": 1.0637229024723638e-05, "loss_breakdown/pointer_loss": 0.16780173778533936, "step": 4260 }, { "epoch": 0.43543103338389566, "grad_norm": 5.302496957967401, "learning_rate": 3.1371104815864024e-06, "loss": 0.2824, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 9.118383786699269e-06, "loss_breakdown/pointer_loss": 0.1127605140209198, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 1.949464058270678e-05, "loss_breakdown/pointer_loss": 0.3201240301132202, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 9.716579370433465e-06, "loss_breakdown/pointer_loss": 0.32014375925064087, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 1.0148482033400796e-05, "loss_breakdown/pointer_loss": 0.33396589756011963, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 6.968748039071215e-06, "loss_breakdown/pointer_loss": 0.6036343574523926, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 7.891976565588266e-06, "loss_breakdown/pointer_loss": 0.18211603164672852, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 1.1314224138914142e-05, "loss_breakdown/pointer_loss": 1.2963309288024902, "step": 4270 }, { "epoch": 0.43543103338389566, "loss_breakdown/lm_loss": 7.65788081480423e-06, "loss_breakdown/pointer_loss": 0.521571159362793, "step": 4270 }, { "epoch": 0.43645077819275724, "grad_norm": 117.78383019851029, "learning_rate": 3.1314447592067993e-06, "loss": 0.2613, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 9.862370461632963e-06, "loss_breakdown/pointer_loss": 0.33262860774993896, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 1.0152345566893928e-05, "loss_breakdown/pointer_loss": 2.224595069885254, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 1.1948408427997492e-05, "loss_breakdown/pointer_loss": 1.8353538513183594, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 9.341898476122878e-06, "loss_breakdown/pointer_loss": 0.25691890716552734, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 1.21390912681818e-05, "loss_breakdown/pointer_loss": 2.709369659423828, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 1.3299034435476642e-05, "loss_breakdown/pointer_loss": 0.08822935819625854, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 1.3112321539665572e-05, "loss_breakdown/pointer_loss": 0.11209005862474442, "step": 4280 }, { "epoch": 0.43645077819275724, "loss_breakdown/lm_loss": 1.2449145287973806e-05, "loss_breakdown/pointer_loss": 0.14626795053482056, "step": 4280 }, { "epoch": 0.43747052300161887, "grad_norm": 8.52668051149029, "learning_rate": 3.125779036827196e-06, "loss": 0.3081, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 8.278299901576247e-06, "loss_breakdown/pointer_loss": 0.16971249878406525, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 8.142506885633338e-06, "loss_breakdown/pointer_loss": 0.43471258878707886, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 9.261032573704142e-06, "loss_breakdown/pointer_loss": 0.39536502957344055, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 7.254438969539478e-06, "loss_breakdown/pointer_loss": 0.25235113501548767, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 1.870455889729783e-05, "loss_breakdown/pointer_loss": 0.2856811285018921, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 1.3674894944415428e-05, "loss_breakdown/pointer_loss": 0.2729233205318451, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 6.845515144959791e-06, "loss_breakdown/pointer_loss": 0.42718571424484253, "step": 4290 }, { "epoch": 0.43747052300161887, "loss_breakdown/lm_loss": 1.0496079084987286e-05, "loss_breakdown/pointer_loss": 0.22850175201892853, "step": 4290 }, { "epoch": 0.43849026781048045, "grad_norm": 9.453146139997918, "learning_rate": 3.1201133144475928e-06, "loss": 0.273, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 4.762836033478379e-05, "loss_breakdown/pointer_loss": 2.252573013305664, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 2.0687884898507036e-05, "loss_breakdown/pointer_loss": 0.5031719207763672, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 1.534996772534214e-05, "loss_breakdown/pointer_loss": 0.7577135562896729, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 1.3166734788683243e-05, "loss_breakdown/pointer_loss": 0.6794236302375793, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 1.5374700524262153e-05, "loss_breakdown/pointer_loss": 1.1096141338348389, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 1.6027508536353707e-05, "loss_breakdown/pointer_loss": 0.4375678300857544, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 1.2747456821671221e-05, "loss_breakdown/pointer_loss": 0.7887189388275146, "step": 4300 }, { "epoch": 0.43849026781048045, "loss_breakdown/lm_loss": 9.741468602442183e-06, "loss_breakdown/pointer_loss": 0.9406097531318665, "step": 4300 }, { "epoch": 0.439510012619342, "grad_norm": 4.301856793498769, "learning_rate": 3.114447592067989e-06, "loss": 0.2723, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 9.658736416895408e-06, "loss_breakdown/pointer_loss": 0.24331578612327576, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 8.946509296947625e-06, "loss_breakdown/pointer_loss": 0.21012216806411743, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 6.06569801675505e-06, "loss_breakdown/pointer_loss": 0.07620267570018768, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 8.055790203798097e-06, "loss_breakdown/pointer_loss": 0.11653297394514084, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 1.1655076377792284e-05, "loss_breakdown/pointer_loss": 0.4644883871078491, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 8.781620636000298e-06, "loss_breakdown/pointer_loss": 2.276393413543701, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 8.280962902063038e-06, "loss_breakdown/pointer_loss": 0.43135157227516174, "step": 4310 }, { "epoch": 0.439510012619342, "loss_breakdown/lm_loss": 1.352163508272497e-05, "loss_breakdown/pointer_loss": 1.84990656375885, "step": 4310 }, { "epoch": 0.4405297574282036, "grad_norm": 5.265958676825988, "learning_rate": 3.1087818696883853e-06, "loss": 0.3092, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 9.843971383816097e-06, "loss_breakdown/pointer_loss": 1.1436160802841187, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 9.588366992829833e-06, "loss_breakdown/pointer_loss": 0.7294876575469971, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 7.574789378850255e-06, "loss_breakdown/pointer_loss": 0.3386842906475067, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 7.049151918181451e-06, "loss_breakdown/pointer_loss": 0.24045050144195557, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 9.780380423762836e-06, "loss_breakdown/pointer_loss": 0.25559407472610474, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 9.13581061467994e-06, "loss_breakdown/pointer_loss": 0.2927476763725281, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 1.1787356925196946e-05, "loss_breakdown/pointer_loss": 1.4755045175552368, "step": 4320 }, { "epoch": 0.4405297574282036, "loss_breakdown/lm_loss": 1.1119688679173123e-05, "loss_breakdown/pointer_loss": 0.9376461505889893, "step": 4320 }, { "epoch": 0.4415495022370652, "grad_norm": 4.952898599751375, "learning_rate": 3.1031161473087823e-06, "loss": 0.2542, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 6.632848817389458e-05, "loss_breakdown/pointer_loss": 0.4552841782569885, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.9734989109565504e-05, "loss_breakdown/pointer_loss": 0.050002321600914, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.2536527719930746e-05, "loss_breakdown/pointer_loss": 0.21805141866207123, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.5266117770806886e-05, "loss_breakdown/pointer_loss": 0.34756335616111755, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.085973417502828e-05, "loss_breakdown/pointer_loss": 0.17609953880310059, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.3235846381576266e-05, "loss_breakdown/pointer_loss": 0.3090181350708008, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.3330846741155256e-05, "loss_breakdown/pointer_loss": 0.19152840971946716, "step": 4330 }, { "epoch": 0.4415495022370652, "loss_breakdown/lm_loss": 1.1861027815029956e-05, "loss_breakdown/pointer_loss": 0.9248746633529663, "step": 4330 }, { "epoch": 0.44256924704592676, "grad_norm": 10.479557566429158, "learning_rate": 3.0974504249291788e-06, "loss": 0.2826, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 9.779574611457065e-05, "loss_breakdown/pointer_loss": 2.9109482765197754, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 7.962392373883631e-06, "loss_breakdown/pointer_loss": 0.15342846512794495, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 8.18513217382133e-06, "loss_breakdown/pointer_loss": 0.11972533166408539, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 7.97023585619172e-06, "loss_breakdown/pointer_loss": 0.39414599537849426, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 1.0437161108711734e-05, "loss_breakdown/pointer_loss": 0.2898077368736267, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 1.1645232007140294e-05, "loss_breakdown/pointer_loss": 0.17558199167251587, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 8.609758879174478e-06, "loss_breakdown/pointer_loss": 0.1674731969833374, "step": 4340 }, { "epoch": 0.44256924704592676, "loss_breakdown/lm_loss": 8.270018042821903e-06, "loss_breakdown/pointer_loss": 0.18885508179664612, "step": 4340 }, { "epoch": 0.44358899185478834, "grad_norm": 8.82837751991416, "learning_rate": 3.0917847025495753e-06, "loss": 0.2701, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 3.044086588488426e-05, "loss_breakdown/pointer_loss": 2.2023141384124756, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.7109936379711144e-05, "loss_breakdown/pointer_loss": 0.6915504932403564, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.2500409866333939e-05, "loss_breakdown/pointer_loss": 0.32508689165115356, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.2829184925067239e-05, "loss_breakdown/pointer_loss": 0.6113986968994141, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.0591870704956818e-05, "loss_breakdown/pointer_loss": 0.8415192365646362, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.2177452845207881e-05, "loss_breakdown/pointer_loss": 0.43812742829322815, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.0141888196812943e-05, "loss_breakdown/pointer_loss": 1.0052136182785034, "step": 4350 }, { "epoch": 0.44358899185478834, "loss_breakdown/lm_loss": 1.2012322258669883e-05, "loss_breakdown/pointer_loss": 0.7219968438148499, "step": 4350 }, { "epoch": 0.4446087366636499, "grad_norm": 4.831317865494569, "learning_rate": 3.086118980169972e-06, "loss": 0.2738, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 8.76343074196484e-06, "loss_breakdown/pointer_loss": 0.26744207739830017, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 7.114719664969016e-06, "loss_breakdown/pointer_loss": 0.3111506700515747, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 7.5855823524761945e-06, "loss_breakdown/pointer_loss": 1.3357294797897339, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 9.890211003948934e-06, "loss_breakdown/pointer_loss": 0.1449078917503357, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 8.086249181360472e-06, "loss_breakdown/pointer_loss": 0.7450864911079407, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 9.7193678811891e-06, "loss_breakdown/pointer_loss": 0.16729025542736053, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 8.400154911214486e-06, "loss_breakdown/pointer_loss": 2.1695401668548584, "step": 4360 }, { "epoch": 0.4446087366636499, "loss_breakdown/lm_loss": 1.4614605788665358e-05, "loss_breakdown/pointer_loss": 0.685100793838501, "step": 4360 }, { "epoch": 0.4456284814725115, "grad_norm": 3.2065263923344514, "learning_rate": 3.0804532577903683e-06, "loss": 0.2857, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 8.84029759617988e-06, "loss_breakdown/pointer_loss": 1.025463342666626, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 8.09232733445242e-06, "loss_breakdown/pointer_loss": 0.405855655670166, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 9.239808605343569e-06, "loss_breakdown/pointer_loss": 0.31115227937698364, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 8.244096534326673e-06, "loss_breakdown/pointer_loss": 0.3474521338939667, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 8.058800631260965e-06, "loss_breakdown/pointer_loss": 0.4569212794303894, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 8.842909664963372e-06, "loss_breakdown/pointer_loss": 0.3014770746231079, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 6.4434138948854525e-06, "loss_breakdown/pointer_loss": 0.21731440722942352, "step": 4370 }, { "epoch": 0.4456284814725115, "loss_breakdown/lm_loss": 8.29929285828257e-06, "loss_breakdown/pointer_loss": 0.4504443407058716, "step": 4370 }, { "epoch": 0.4466482262813731, "grad_norm": 6.77500643794072, "learning_rate": 3.074787535410765e-06, "loss": 0.2561, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 2.0283054254832678e-05, "loss_breakdown/pointer_loss": 0.9487931728363037, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 1.0053116966446396e-05, "loss_breakdown/pointer_loss": 6.0982537269592285, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 2.5417710276087746e-05, "loss_breakdown/pointer_loss": 0.17699044942855835, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 1.19364085549023e-05, "loss_breakdown/pointer_loss": 0.4083273410797119, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 9.878257515083533e-06, "loss_breakdown/pointer_loss": 0.10985904932022095, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 6.9935240389895625e-06, "loss_breakdown/pointer_loss": 2.348724365234375, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 1.342600899079116e-05, "loss_breakdown/pointer_loss": 0.06876913458108902, "step": 4380 }, { "epoch": 0.4466482262813731, "loss_breakdown/lm_loss": 8.567062650399748e-06, "loss_breakdown/pointer_loss": 0.09041513502597809, "step": 4380 }, { "epoch": 0.44766797109023465, "grad_norm": 3.809611130750529, "learning_rate": 3.0691218130311617e-06, "loss": 0.3015, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 7.790173185640015e-06, "loss_breakdown/pointer_loss": 0.2915496230125427, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 8.943774446379393e-06, "loss_breakdown/pointer_loss": 0.13354390859603882, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 6.3822126321610995e-06, "loss_breakdown/pointer_loss": 0.6561529636383057, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 1.112877180275973e-05, "loss_breakdown/pointer_loss": 0.09729719907045364, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 7.686660865147132e-06, "loss_breakdown/pointer_loss": 0.29318568110466003, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 8.6737400124548e-06, "loss_breakdown/pointer_loss": 0.3605799674987793, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 8.76569356478285e-06, "loss_breakdown/pointer_loss": 0.961422324180603, "step": 4390 }, { "epoch": 0.44766797109023465, "loss_breakdown/lm_loss": 7.042534434731351e-06, "loss_breakdown/pointer_loss": 0.3464369475841522, "step": 4390 }, { "epoch": 0.44868771589909623, "grad_norm": 6.602395339140426, "learning_rate": 3.0634560906515582e-06, "loss": 0.276, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 2.8677424779743887e-05, "loss_breakdown/pointer_loss": 1.3999428749084473, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 2.1183888748055324e-05, "loss_breakdown/pointer_loss": 0.7363839149475098, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 1.415674250893062e-05, "loss_breakdown/pointer_loss": 0.7350281476974487, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 4.291287041269243e-05, "loss_breakdown/pointer_loss": 0.81218022108078, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 1.6994057659758255e-05, "loss_breakdown/pointer_loss": 0.7408754229545593, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 2.9613185688504018e-05, "loss_breakdown/pointer_loss": 0.4251724183559418, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 8.325217095261905e-06, "loss_breakdown/pointer_loss": 0.7631539106369019, "step": 4400 }, { "epoch": 0.44868771589909623, "loss_breakdown/lm_loss": 1.3548033166443929e-05, "loss_breakdown/pointer_loss": 0.4861339330673218, "step": 4400 }, { "epoch": 0.4497074607079578, "grad_norm": 4.5934493583061, "learning_rate": 3.057790368271955e-06, "loss": 0.2709, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 7.5299526542949025e-06, "loss_breakdown/pointer_loss": 0.2038622349500656, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 1.4757307326362934e-05, "loss_breakdown/pointer_loss": 0.22797784209251404, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 9.331496585218702e-06, "loss_breakdown/pointer_loss": 0.12733779847621918, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 9.221840628015343e-06, "loss_breakdown/pointer_loss": 1.0055336952209473, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 8.984226042230148e-06, "loss_breakdown/pointer_loss": 2.5998058319091797, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 1.9092531147180125e-05, "loss_breakdown/pointer_loss": 0.6012357473373413, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 1.9441522454144433e-05, "loss_breakdown/pointer_loss": 0.38453173637390137, "step": 4410 }, { "epoch": 0.4497074607079578, "loss_breakdown/lm_loss": 8.92068328539608e-06, "loss_breakdown/pointer_loss": 0.18831677734851837, "step": 4410 }, { "epoch": 0.45072720551681944, "grad_norm": 3.3331769843268164, "learning_rate": 3.0521246458923516e-06, "loss": 0.2893, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 5.0119662773795426e-05, "loss_breakdown/pointer_loss": 0.9835413694381714, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 9.405584933119826e-06, "loss_breakdown/pointer_loss": 0.4970221519470215, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 1.4463475963566452e-05, "loss_breakdown/pointer_loss": 0.3219949007034302, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 8.543191142962314e-06, "loss_breakdown/pointer_loss": 0.17117813229560852, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 1.0762119018181693e-05, "loss_breakdown/pointer_loss": 0.44717520475387573, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 8.63644072524039e-06, "loss_breakdown/pointer_loss": 0.2913718819618225, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 9.421542927157134e-06, "loss_breakdown/pointer_loss": 0.2025018334388733, "step": 4420 }, { "epoch": 0.45072720551681944, "loss_breakdown/lm_loss": 1.05949902717839e-05, "loss_breakdown/pointer_loss": 0.20878304541110992, "step": 4420 }, { "epoch": 0.451746950325681, "grad_norm": 8.160907502424953, "learning_rate": 3.0464589235127477e-06, "loss": 0.2692, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 8.869000339473132e-06, "loss_breakdown/pointer_loss": 0.4448868930339813, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 1.0851746083062608e-05, "loss_breakdown/pointer_loss": 3.5133986473083496, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 1.1408016689529177e-05, "loss_breakdown/pointer_loss": 0.08177919685840607, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 5.295481241773814e-05, "loss_breakdown/pointer_loss": 0.33870741724967957, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 7.46239038562635e-06, "loss_breakdown/pointer_loss": 0.23876342177391052, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 9.083577424462419e-06, "loss_breakdown/pointer_loss": 0.20223477482795715, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 1.797486220311839e-05, "loss_breakdown/pointer_loss": 0.2236521989107132, "step": 4430 }, { "epoch": 0.451746950325681, "loss_breakdown/lm_loss": 6.874312930449378e-06, "loss_breakdown/pointer_loss": 0.041793227195739746, "step": 4430 }, { "epoch": 0.4527666951345426, "grad_norm": 4.865709010987977, "learning_rate": 3.0407932011331447e-06, "loss": 0.2838, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 7.487112725357292e-06, "loss_breakdown/pointer_loss": 0.1417073905467987, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 6.208086233527865e-06, "loss_breakdown/pointer_loss": 0.18244574964046478, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 9.69041957432637e-06, "loss_breakdown/pointer_loss": 0.1419031023979187, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 1.1917148185602855e-05, "loss_breakdown/pointer_loss": 0.20234449207782745, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 7.398809884762159e-06, "loss_breakdown/pointer_loss": 0.564530611038208, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 8.05479703558376e-06, "loss_breakdown/pointer_loss": 0.13193880021572113, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 9.2949712779955e-06, "loss_breakdown/pointer_loss": 1.8554835319519043, "step": 4440 }, { "epoch": 0.4527666951345426, "loss_breakdown/lm_loss": 8.572344086132944e-06, "loss_breakdown/pointer_loss": 0.9626997113227844, "step": 4440 }, { "epoch": 0.4537864399434042, "grad_norm": 79.07506332001537, "learning_rate": 3.035127478753541e-06, "loss": 0.2897, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 3.141393244732171e-05, "loss_breakdown/pointer_loss": 1.65195631980896, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 2.3325119400396943e-05, "loss_breakdown/pointer_loss": 1.190678596496582, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 1.505382897448726e-05, "loss_breakdown/pointer_loss": 0.6388934254646301, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 1.4568157894245815e-05, "loss_breakdown/pointer_loss": 0.797338604927063, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 1.522291040600976e-05, "loss_breakdown/pointer_loss": 0.4590502679347992, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 1.2767266525770538e-05, "loss_breakdown/pointer_loss": 0.5143004059791565, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 2.159810537705198e-05, "loss_breakdown/pointer_loss": 1.329537034034729, "step": 4450 }, { "epoch": 0.4537864399434042, "loss_breakdown/lm_loss": 1.4654074220743496e-05, "loss_breakdown/pointer_loss": 0.7807815074920654, "step": 4450 }, { "epoch": 0.45480618475226575, "grad_norm": 6.7579664450651755, "learning_rate": 3.029461756373938e-06, "loss": 0.2844, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.0801630196510814e-05, "loss_breakdown/pointer_loss": 0.10452671349048615, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.1310223271721043e-05, "loss_breakdown/pointer_loss": 0.10811196267604828, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.4153634765534662e-05, "loss_breakdown/pointer_loss": 0.728734016418457, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.19762680697022e-05, "loss_breakdown/pointer_loss": 0.03332112729549408, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 9.751110155775677e-06, "loss_breakdown/pointer_loss": 0.1726965457201004, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.7794651284930296e-05, "loss_breakdown/pointer_loss": 0.0770421028137207, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.1046484360122122e-05, "loss_breakdown/pointer_loss": 0.39692360162734985, "step": 4460 }, { "epoch": 0.45480618475226575, "loss_breakdown/lm_loss": 1.0184253369516227e-05, "loss_breakdown/pointer_loss": 0.22192732989788055, "step": 4460 }, { "epoch": 0.45582592956112733, "grad_norm": 2.436162412540197, "learning_rate": 3.0237960339943346e-06, "loss": 0.2816, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 2.8058184398105368e-05, "loss_breakdown/pointer_loss": 0.5601863265037537, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 1.0559774636931252e-05, "loss_breakdown/pointer_loss": 0.33639901876449585, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 1.2708906069747172e-05, "loss_breakdown/pointer_loss": 0.9493968486785889, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 1.0284804375260137e-05, "loss_breakdown/pointer_loss": 0.8252065181732178, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 6.657097856077598e-06, "loss_breakdown/pointer_loss": 0.8323284387588501, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 8.393273674300872e-06, "loss_breakdown/pointer_loss": 0.2325952649116516, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 1.535798037366476e-05, "loss_breakdown/pointer_loss": 0.8933249711990356, "step": 4470 }, { "epoch": 0.45582592956112733, "loss_breakdown/lm_loss": 9.318807315139566e-06, "loss_breakdown/pointer_loss": 0.1778157651424408, "step": 4470 }, { "epoch": 0.4568456743699889, "grad_norm": 10.960555297096091, "learning_rate": 3.018130311614731e-06, "loss": 0.274, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 9.576250704412814e-06, "loss_breakdown/pointer_loss": 0.16920065879821777, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 7.231938980112318e-06, "loss_breakdown/pointer_loss": 0.11470268666744232, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 8.495509064232465e-06, "loss_breakdown/pointer_loss": 0.1476607620716095, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 1.116959629143821e-05, "loss_breakdown/pointer_loss": 0.3622586727142334, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 1.3116254194756038e-05, "loss_breakdown/pointer_loss": 1.896610975265503, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 9.365708137920592e-06, "loss_breakdown/pointer_loss": 0.07192564010620117, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 8.70610438141739e-06, "loss_breakdown/pointer_loss": 2.670307159423828, "step": 4480 }, { "epoch": 0.4568456743699889, "loss_breakdown/lm_loss": 1.560369310027454e-05, "loss_breakdown/pointer_loss": 2.5607590675354004, "step": 4480 }, { "epoch": 0.4578654191788505, "grad_norm": 4.239715355232928, "learning_rate": 3.012464589235128e-06, "loss": 0.2976, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 7.022663794487016e-06, "loss_breakdown/pointer_loss": 0.3849181532859802, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 1.0395983736088965e-05, "loss_breakdown/pointer_loss": 0.24396920204162598, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 1.0663584362191614e-05, "loss_breakdown/pointer_loss": 0.25659751892089844, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 9.45529063756112e-06, "loss_breakdown/pointer_loss": 0.474385142326355, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 6.500796644104412e-06, "loss_breakdown/pointer_loss": 0.3210858702659607, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 1.1906129657290876e-05, "loss_breakdown/pointer_loss": 0.6284339427947998, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 7.912361979833804e-06, "loss_breakdown/pointer_loss": 0.23442503809928894, "step": 4490 }, { "epoch": 0.4578654191788505, "loss_breakdown/lm_loss": 8.971546776592731e-06, "loss_breakdown/pointer_loss": 0.1400069296360016, "step": 4490 }, { "epoch": 0.45888516398771206, "grad_norm": 12.36196763967929, "learning_rate": 3.006798866855524e-06, "loss": 0.3015, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 2.668360866664443e-05, "loss_breakdown/pointer_loss": 1.248779535293579, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 1.2314558262005448e-05, "loss_breakdown/pointer_loss": 0.5257911682128906, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 1.1477940461190883e-05, "loss_breakdown/pointer_loss": 0.48735716938972473, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 1.494748630648246e-05, "loss_breakdown/pointer_loss": 0.4772607088088989, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 1.1124884622404352e-05, "loss_breakdown/pointer_loss": 0.3227165937423706, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 1.1541294952621683e-05, "loss_breakdown/pointer_loss": 0.787216305732727, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 8.477047231281176e-06, "loss_breakdown/pointer_loss": 0.5866204500198364, "step": 4500 }, { "epoch": 0.45888516398771206, "loss_breakdown/lm_loss": 1.101488305721432e-05, "loss_breakdown/pointer_loss": 0.4314940571784973, "step": 4500 }, { "epoch": 0.45990490879657364, "grad_norm": 4.001153411782988, "learning_rate": 3.001133144475921e-06, "loss": 0.2772, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 6.1411965361912735e-06, "loss_breakdown/pointer_loss": 0.3389017581939697, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 1.030849580274662e-05, "loss_breakdown/pointer_loss": 1.2303593158721924, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 8.11405243439367e-06, "loss_breakdown/pointer_loss": 0.2772713005542755, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 7.724650458840188e-06, "loss_breakdown/pointer_loss": 0.17244334518909454, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 8.22134916234063e-06, "loss_breakdown/pointer_loss": 0.1552356779575348, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 7.769656804157421e-06, "loss_breakdown/pointer_loss": 0.1659848988056183, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 1.0339236723666545e-05, "loss_breakdown/pointer_loss": 0.4227514863014221, "step": 4510 }, { "epoch": 0.45990490879657364, "loss_breakdown/lm_loss": 1.5158891073951963e-05, "loss_breakdown/pointer_loss": 0.22856450080871582, "step": 4510 }, { "epoch": 0.4609246536054352, "grad_norm": 9.046945420733055, "learning_rate": 2.9954674220963175e-06, "loss": 0.2933, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 1.2033039638481569e-05, "loss_breakdown/pointer_loss": 1.187973976135254, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 1.1065748367400374e-05, "loss_breakdown/pointer_loss": 0.4202858805656433, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 7.702528819208965e-06, "loss_breakdown/pointer_loss": 0.4966908097267151, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 8.157762749760877e-06, "loss_breakdown/pointer_loss": 1.4819833040237427, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 9.72161433310248e-06, "loss_breakdown/pointer_loss": 0.6181466579437256, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 6.897097591718193e-06, "loss_breakdown/pointer_loss": 0.27477723360061646, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 7.329932941502193e-06, "loss_breakdown/pointer_loss": 0.2872823476791382, "step": 4520 }, { "epoch": 0.4609246536054352, "loss_breakdown/lm_loss": 1.1218211511732079e-05, "loss_breakdown/pointer_loss": 0.477561891078949, "step": 4520 }, { "epoch": 0.4619443984142968, "grad_norm": 28.44981302020925, "learning_rate": 2.989801699716714e-06, "loss": 0.2683, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 8.46371676743729e-06, "loss_breakdown/pointer_loss": 0.1667795479297638, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 9.190826858684886e-06, "loss_breakdown/pointer_loss": 0.04846208170056343, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 1.292193428525934e-05, "loss_breakdown/pointer_loss": 0.5585527420043945, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 7.712706064921804e-06, "loss_breakdown/pointer_loss": 0.3162881135940552, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 1.0700750863179564e-05, "loss_breakdown/pointer_loss": 0.21038532257080078, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 7.613374236825621e-06, "loss_breakdown/pointer_loss": 0.09971846640110016, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 1.1825159162981436e-05, "loss_breakdown/pointer_loss": 0.12111042439937592, "step": 4530 }, { "epoch": 0.4619443984142968, "loss_breakdown/lm_loss": 1.1066325896536e-05, "loss_breakdown/pointer_loss": 0.36279600858688354, "step": 4530 }, { "epoch": 0.46296414322315843, "grad_norm": 4.524869671285019, "learning_rate": 2.984135977337111e-06, "loss": 0.2929, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 7.180285592767177e-06, "loss_breakdown/pointer_loss": 0.6713969707489014, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 7.015077699179528e-06, "loss_breakdown/pointer_loss": 0.8020310997962952, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 1.0130155715160072e-05, "loss_breakdown/pointer_loss": 0.14103710651397705, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 7.582273610751145e-06, "loss_breakdown/pointer_loss": 0.5992553234100342, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 5.697351753042312e-06, "loss_breakdown/pointer_loss": 0.3454597592353821, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 8.093227734207176e-06, "loss_breakdown/pointer_loss": 0.2263362556695938, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 6.813389518356416e-06, "loss_breakdown/pointer_loss": 0.23097245395183563, "step": 4540 }, { "epoch": 0.46296414322315843, "loss_breakdown/lm_loss": 7.3158080340363085e-06, "loss_breakdown/pointer_loss": 0.19949038326740265, "step": 4540 }, { "epoch": 0.46398388803202, "grad_norm": 7.602957170401733, "learning_rate": 2.9784702549575075e-06, "loss": 0.2647, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 3.3374115446349606e-05, "loss_breakdown/pointer_loss": 2.466254234313965, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 1.4603760064346716e-05, "loss_breakdown/pointer_loss": 0.5381419062614441, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 1.3330100955499802e-05, "loss_breakdown/pointer_loss": 0.9932856559753418, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 8.18103308120044e-06, "loss_breakdown/pointer_loss": 0.7514770030975342, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 8.399791113333777e-06, "loss_breakdown/pointer_loss": 0.30045998096466064, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 1.4586419638362713e-05, "loss_breakdown/pointer_loss": 0.977267324924469, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 8.917202649172395e-06, "loss_breakdown/pointer_loss": 0.8698618412017822, "step": 4550 }, { "epoch": 0.46398388803202, "loss_breakdown/lm_loss": 8.140155841829255e-06, "loss_breakdown/pointer_loss": 0.7697559595108032, "step": 4550 }, { "epoch": 0.4650036328408816, "grad_norm": 4.320124639759233, "learning_rate": 2.9728045325779035e-06, "loss": 0.2979, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 7.14748148311628e-06, "loss_breakdown/pointer_loss": 0.18123623728752136, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 1.065575906977756e-05, "loss_breakdown/pointer_loss": 0.14801940321922302, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 8.660127605253365e-06, "loss_breakdown/pointer_loss": 0.4155208468437195, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 8.204629921237938e-06, "loss_breakdown/pointer_loss": 0.23856931924819946, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 9.824227163335308e-06, "loss_breakdown/pointer_loss": 0.12446320056915283, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 9.453891834709793e-06, "loss_breakdown/pointer_loss": 0.5231070518493652, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 1.57736776600359e-05, "loss_breakdown/pointer_loss": 2.0729517936706543, "step": 4560 }, { "epoch": 0.4650036328408816, "loss_breakdown/lm_loss": 7.732593985565472e-06, "loss_breakdown/pointer_loss": 0.3408365845680237, "step": 4560 }, { "epoch": 0.46602337764974316, "grad_norm": 2.651657400892314, "learning_rate": 2.9671388101983005e-06, "loss": 0.2785, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 1.0678923899831716e-05, "loss_breakdown/pointer_loss": 0.45912402868270874, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 8.891845936886966e-06, "loss_breakdown/pointer_loss": 0.6439469456672668, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 8.778541086940095e-06, "loss_breakdown/pointer_loss": 0.4252784848213196, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 1.193596199300373e-05, "loss_breakdown/pointer_loss": 0.32551437616348267, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 9.997621418733615e-06, "loss_breakdown/pointer_loss": 0.17218756675720215, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 9.242751730198506e-06, "loss_breakdown/pointer_loss": 0.2258991003036499, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 7.989217010617722e-06, "loss_breakdown/pointer_loss": 0.4128659665584564, "step": 4570 }, { "epoch": 0.46602337764974316, "loss_breakdown/lm_loss": 7.122372608137084e-06, "loss_breakdown/pointer_loss": 0.3082430362701416, "step": 4570 }, { "epoch": 0.46704312245860474, "grad_norm": 7.136901953190622, "learning_rate": 2.961473087818697e-06, "loss": 0.2818, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.5242415429383982e-05, "loss_breakdown/pointer_loss": 0.38449251651763916, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.8146665752283297e-05, "loss_breakdown/pointer_loss": 0.10903501510620117, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.6279185729217716e-05, "loss_breakdown/pointer_loss": 2.001614809036255, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.5862166037550196e-05, "loss_breakdown/pointer_loss": 0.3069326877593994, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.2131135918025393e-05, "loss_breakdown/pointer_loss": 0.1725919544696808, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 2.9171227652113885e-05, "loss_breakdown/pointer_loss": 0.31523072719573975, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.5250290744006634e-05, "loss_breakdown/pointer_loss": 1.705012321472168, "step": 4580 }, { "epoch": 0.46704312245860474, "loss_breakdown/lm_loss": 1.4451589777308982e-05, "loss_breakdown/pointer_loss": 0.5600615739822388, "step": 4580 }, { "epoch": 0.4680628672674663, "grad_norm": 10.360060469955341, "learning_rate": 2.955807365439094e-06, "loss": 0.2937, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 8.106102541205473e-06, "loss_breakdown/pointer_loss": 1.2103497982025146, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 8.285444891953375e-06, "loss_breakdown/pointer_loss": 0.3739638328552246, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 7.653496140846983e-06, "loss_breakdown/pointer_loss": 0.3359299600124359, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 1.3228504030848853e-05, "loss_breakdown/pointer_loss": 0.2812281847000122, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 9.053307621798012e-06, "loss_breakdown/pointer_loss": 0.09529004245996475, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 1.0418508281873073e-05, "loss_breakdown/pointer_loss": 0.16158747673034668, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 9.227424925484229e-06, "loss_breakdown/pointer_loss": 0.22914913296699524, "step": 4590 }, { "epoch": 0.4680628672674663, "loss_breakdown/lm_loss": 1.0677672435122076e-05, "loss_breakdown/pointer_loss": 0.18326488137245178, "step": 4590 }, { "epoch": 0.4690826120763279, "grad_norm": 8.402824464534579, "learning_rate": 2.9501416430594904e-06, "loss": 0.2605, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 7.356720743700862e-05, "loss_breakdown/pointer_loss": 2.7631356716156006, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 1.1226211427128874e-05, "loss_breakdown/pointer_loss": 1.125363826751709, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 1.1345984603394754e-05, "loss_breakdown/pointer_loss": 0.9220532178878784, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 7.5663128882297315e-06, "loss_breakdown/pointer_loss": 1.5256162881851196, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 2.3534326828666963e-05, "loss_breakdown/pointer_loss": 0.3415105640888214, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 1.0835616194526665e-05, "loss_breakdown/pointer_loss": 0.5989975333213806, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 1.3256617421575356e-05, "loss_breakdown/pointer_loss": 0.5727640390396118, "step": 4600 }, { "epoch": 0.4690826120763279, "loss_breakdown/lm_loss": 8.790216270426754e-06, "loss_breakdown/pointer_loss": 0.7223083972930908, "step": 4600 }, { "epoch": 0.4701023568851895, "grad_norm": 8.599174060090503, "learning_rate": 2.944475920679887e-06, "loss": 0.2762, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 1.1003528015862685e-05, "loss_breakdown/pointer_loss": 0.08607892692089081, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 1.0325907169317361e-05, "loss_breakdown/pointer_loss": 3.085118532180786, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 9.874255738395732e-06, "loss_breakdown/pointer_loss": 0.18271571397781372, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 1.0172275324293878e-05, "loss_breakdown/pointer_loss": 0.2122727930545807, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 1.0514039786357898e-05, "loss_breakdown/pointer_loss": 0.5622401833534241, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 1.8003443983616307e-05, "loss_breakdown/pointer_loss": 0.16417339444160461, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 8.272982995549683e-06, "loss_breakdown/pointer_loss": 0.15610632300376892, "step": 4610 }, { "epoch": 0.4701023568851895, "loss_breakdown/lm_loss": 1.0784255209728144e-05, "loss_breakdown/pointer_loss": 0.45280054211616516, "step": 4610 }, { "epoch": 0.47112210169405105, "grad_norm": 2.132163801798824, "learning_rate": 2.938810198300284e-06, "loss": 0.2974, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 9.623678124626167e-06, "loss_breakdown/pointer_loss": 1.3865762948989868, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 8.55492635309929e-06, "loss_breakdown/pointer_loss": 0.23635593056678772, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 1.016932128550252e-05, "loss_breakdown/pointer_loss": 0.2021789848804474, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 1.013554083328927e-05, "loss_breakdown/pointer_loss": 0.3656967282295227, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 9.075576599570923e-06, "loss_breakdown/pointer_loss": 0.9260642528533936, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 6.4288101384590846e-06, "loss_breakdown/pointer_loss": 0.3793395161628723, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 1.7976712115341797e-05, "loss_breakdown/pointer_loss": 0.31030339002609253, "step": 4620 }, { "epoch": 0.47112210169405105, "loss_breakdown/lm_loss": 7.72993007558398e-06, "loss_breakdown/pointer_loss": 0.346203088760376, "step": 4620 }, { "epoch": 0.47214184650291263, "grad_norm": 10.562225172935404, "learning_rate": 2.93314447592068e-06, "loss": 0.2515, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 1.1721944247256033e-05, "loss_breakdown/pointer_loss": 0.675678014755249, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 8.837222594593186e-06, "loss_breakdown/pointer_loss": 0.16376519203186035, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 1.7272415789193474e-05, "loss_breakdown/pointer_loss": 0.09550170600414276, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 1.0573625331744552e-05, "loss_breakdown/pointer_loss": 0.16111882030963898, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 1.2969587260158733e-05, "loss_breakdown/pointer_loss": 0.19339974224567413, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 2.1682957594748586e-05, "loss_breakdown/pointer_loss": 2.6631064414978027, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 1.2465009604056831e-05, "loss_breakdown/pointer_loss": 2.1809823513031006, "step": 4630 }, { "epoch": 0.47214184650291263, "loss_breakdown/lm_loss": 1.4550968444382306e-05, "loss_breakdown/pointer_loss": 0.26981037855148315, "step": 4630 }, { "epoch": 0.4731615913117742, "grad_norm": 3.385246996009479, "learning_rate": 2.9274787535410764e-06, "loss": 0.2947, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 7.769847798044793e-06, "loss_breakdown/pointer_loss": 0.21334660053253174, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 2.0212453819112852e-05, "loss_breakdown/pointer_loss": 0.5005118250846863, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 6.7948362811875995e-06, "loss_breakdown/pointer_loss": 0.4698760211467743, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 9.04199987417087e-06, "loss_breakdown/pointer_loss": 0.8600171804428101, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 5.60993703402346e-06, "loss_breakdown/pointer_loss": 0.2687014937400818, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 7.765514965285547e-06, "loss_breakdown/pointer_loss": 0.1983136236667633, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 7.63988009566674e-06, "loss_breakdown/pointer_loss": 1.4660849571228027, "step": 4640 }, { "epoch": 0.4731615913117742, "loss_breakdown/lm_loss": 9.00003760762047e-06, "loss_breakdown/pointer_loss": 0.27484768629074097, "step": 4640 }, { "epoch": 0.4741813361206358, "grad_norm": 22.56643988948051, "learning_rate": 2.9218130311614734e-06, "loss": 0.2707, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 2.0397455955389887e-05, "loss_breakdown/pointer_loss": 1.311678171157837, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 1.4477347576757893e-05, "loss_breakdown/pointer_loss": 0.5119734406471252, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 1.891605461423751e-05, "loss_breakdown/pointer_loss": 0.7529942393302917, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 1.0645513611962087e-05, "loss_breakdown/pointer_loss": 0.89393150806427, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 9.101690011448227e-06, "loss_breakdown/pointer_loss": 0.3181954026222229, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 1.0692891919461545e-05, "loss_breakdown/pointer_loss": 0.4190821647644043, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 1.0164956620428711e-05, "loss_breakdown/pointer_loss": 0.5089112520217896, "step": 4650 }, { "epoch": 0.4741813361206358, "loss_breakdown/lm_loss": 7.893740075815003e-06, "loss_breakdown/pointer_loss": 0.7740600109100342, "step": 4650 }, { "epoch": 0.47520108092949737, "grad_norm": 7.2737947972084225, "learning_rate": 2.91614730878187e-06, "loss": 0.2724, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 8.520645678800065e-06, "loss_breakdown/pointer_loss": 0.05704888328909874, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 7.706120413786266e-06, "loss_breakdown/pointer_loss": 0.7006993293762207, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 9.095498171518557e-06, "loss_breakdown/pointer_loss": 0.5878594517707825, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 1.2676888218265958e-05, "loss_breakdown/pointer_loss": 0.3646979331970215, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 1.027558755595237e-05, "loss_breakdown/pointer_loss": 0.07677101343870163, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 8.140556019498035e-06, "loss_breakdown/pointer_loss": 0.41607797145843506, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 1.0786814527818933e-05, "loss_breakdown/pointer_loss": 0.2049819827079773, "step": 4660 }, { "epoch": 0.47520108092949737, "loss_breakdown/lm_loss": 1.0839875358215068e-05, "loss_breakdown/pointer_loss": 0.1794845163822174, "step": 4660 }, { "epoch": 0.476220825738359, "grad_norm": 2.3557526792675967, "learning_rate": 2.9104815864022668e-06, "loss": 0.2906, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 1.2363691894279327e-05, "loss_breakdown/pointer_loss": 0.48723435401916504, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 9.236651749233715e-06, "loss_breakdown/pointer_loss": 0.3267253637313843, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 1.0973883036058396e-05, "loss_breakdown/pointer_loss": 0.3307293653488159, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 9.543220585328527e-06, "loss_breakdown/pointer_loss": 0.5002116560935974, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 7.887560059316456e-06, "loss_breakdown/pointer_loss": 0.5429202318191528, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 1.3118519746058155e-05, "loss_breakdown/pointer_loss": 1.0110770463943481, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 7.128626748453826e-06, "loss_breakdown/pointer_loss": 0.4428899586200714, "step": 4670 }, { "epoch": 0.476220825738359, "loss_breakdown/lm_loss": 7.36156243874575e-06, "loss_breakdown/pointer_loss": 0.3125361204147339, "step": 4670 }, { "epoch": 0.4772405705472206, "grad_norm": 5.8168853580664965, "learning_rate": 2.9048158640226633e-06, "loss": 0.2604, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 1.0696864592318889e-05, "loss_breakdown/pointer_loss": 0.31014469265937805, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 1.1054459719161969e-05, "loss_breakdown/pointer_loss": 0.35760262608528137, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 1.3609305824502371e-05, "loss_breakdown/pointer_loss": 0.13564686477184296, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 3.0662289645988494e-05, "loss_breakdown/pointer_loss": 0.18744608759880066, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 2.4783596018096432e-05, "loss_breakdown/pointer_loss": 2.020348072052002, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 9.059769581654109e-06, "loss_breakdown/pointer_loss": 0.37639692425727844, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 9.075656635104679e-06, "loss_breakdown/pointer_loss": 0.5274702906608582, "step": 4680 }, { "epoch": 0.4772405705472206, "loss_breakdown/lm_loss": 1.3116461559548043e-05, "loss_breakdown/pointer_loss": 4.133236885070801, "step": 4680 }, { "epoch": 0.47826031535608216, "grad_norm": 3.3688961716902663, "learning_rate": 2.8991501416430594e-06, "loss": 0.2934, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 1.0370083145971876e-05, "loss_breakdown/pointer_loss": 0.0879393070936203, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 7.9884875958669e-06, "loss_breakdown/pointer_loss": 0.8252277374267578, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 7.919775271147955e-06, "loss_breakdown/pointer_loss": 0.5152530074119568, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 1.1839546459668782e-05, "loss_breakdown/pointer_loss": 0.2857920825481415, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 9.109604434343055e-06, "loss_breakdown/pointer_loss": 0.438822478055954, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 7.977372661116533e-06, "loss_breakdown/pointer_loss": 0.3698241412639618, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 7.738800377410371e-06, "loss_breakdown/pointer_loss": 1.0933558940887451, "step": 4690 }, { "epoch": 0.47826031535608216, "loss_breakdown/lm_loss": 8.058415005507413e-06, "loss_breakdown/pointer_loss": 0.07085156440734863, "step": 4690 }, { "epoch": 0.47928006016494373, "grad_norm": 10.152472170562604, "learning_rate": 2.8934844192634563e-06, "loss": 0.2645, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 2.8248179660295136e-05, "loss_breakdown/pointer_loss": 1.0208346843719482, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 2.155824586225208e-05, "loss_breakdown/pointer_loss": 1.4728655815124512, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 1.2437542864063289e-05, "loss_breakdown/pointer_loss": 0.34644627571105957, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 1.1884292689501308e-05, "loss_breakdown/pointer_loss": 0.39337217807769775, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 1.4305424883787055e-05, "loss_breakdown/pointer_loss": 0.8071560263633728, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 1.3021568520343862e-05, "loss_breakdown/pointer_loss": 0.3417477011680603, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 1.3872064300812781e-05, "loss_breakdown/pointer_loss": 0.32911181449890137, "step": 4700 }, { "epoch": 0.47928006016494373, "loss_breakdown/lm_loss": 1.187159250548575e-05, "loss_breakdown/pointer_loss": 0.32228612899780273, "step": 4700 }, { "epoch": 0.4802998049738053, "grad_norm": 4.373349239596266, "learning_rate": 2.887818696883853e-06, "loss": 0.271, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 8.437563337793108e-06, "loss_breakdown/pointer_loss": 0.11936497688293457, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 7.402796200040029e-06, "loss_breakdown/pointer_loss": 0.47991156578063965, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 9.597817552275956e-06, "loss_breakdown/pointer_loss": 0.20405682921409607, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 1.0394856872153468e-05, "loss_breakdown/pointer_loss": 0.18958111107349396, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 7.621353233844275e-06, "loss_breakdown/pointer_loss": 2.115746259689331, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 1.0780043339764234e-05, "loss_breakdown/pointer_loss": 0.2974041700363159, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 1.1133907719340641e-05, "loss_breakdown/pointer_loss": 2.3108580112457275, "step": 4710 }, { "epoch": 0.4802998049738053, "loss_breakdown/lm_loss": 1.5133909982978366e-05, "loss_breakdown/pointer_loss": 0.5113238096237183, "step": 4710 }, { "epoch": 0.4813195497826669, "grad_norm": 5.312068786014605, "learning_rate": 2.8821529745042497e-06, "loss": 0.3068, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 1.3954376299807336e-05, "loss_breakdown/pointer_loss": 0.4311307668685913, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 9.457088708586525e-06, "loss_breakdown/pointer_loss": 0.38728561997413635, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 1.0074174497276545e-05, "loss_breakdown/pointer_loss": 0.10654912889003754, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 1.1440811249485705e-05, "loss_breakdown/pointer_loss": 0.4897712469100952, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 9.309681445301976e-06, "loss_breakdown/pointer_loss": 0.5503951907157898, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 1.0140431186300702e-05, "loss_breakdown/pointer_loss": 0.4819314479827881, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 1.6404486814280972e-05, "loss_breakdown/pointer_loss": 0.5717886090278625, "step": 4720 }, { "epoch": 0.4813195497826669, "loss_breakdown/lm_loss": 9.644777492212597e-06, "loss_breakdown/pointer_loss": 0.7996417880058289, "step": 4720 }, { "epoch": 0.48233929459152847, "grad_norm": 6.168271338848254, "learning_rate": 2.8764872521246462e-06, "loss": 0.257, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 1.0994650438078679e-05, "loss_breakdown/pointer_loss": 0.14596766233444214, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 1.0831917279574554e-05, "loss_breakdown/pointer_loss": 0.8165168762207031, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 9.608123036741745e-06, "loss_breakdown/pointer_loss": 0.20236288011074066, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 2.642076651682146e-05, "loss_breakdown/pointer_loss": 0.11208824813365936, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 1.4884680240356829e-05, "loss_breakdown/pointer_loss": 0.6046898365020752, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 1.0363100045651663e-05, "loss_breakdown/pointer_loss": 0.13803991675376892, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 7.291546808119165e-06, "loss_breakdown/pointer_loss": 0.07523751258850098, "step": 4730 }, { "epoch": 0.48233929459152847, "loss_breakdown/lm_loss": 1.3136525922163855e-05, "loss_breakdown/pointer_loss": 0.4285409450531006, "step": 4730 }, { "epoch": 0.48335903940039004, "grad_norm": 2.8522262157550418, "learning_rate": 2.8708215297450427e-06, "loss": 0.2962, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 1.559036536491476e-05, "loss_breakdown/pointer_loss": 0.1855057030916214, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 1.154295023297891e-05, "loss_breakdown/pointer_loss": 0.7094252109527588, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 7.275078587554162e-06, "loss_breakdown/pointer_loss": 0.14138711988925934, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 1.1284012543910649e-05, "loss_breakdown/pointer_loss": 0.30922120809555054, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 1.1005898159055505e-05, "loss_breakdown/pointer_loss": 0.28281962871551514, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 9.550483810016885e-06, "loss_breakdown/pointer_loss": 0.3264639377593994, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 1.0486901373951696e-05, "loss_breakdown/pointer_loss": 0.26298654079437256, "step": 4740 }, { "epoch": 0.48335903940039004, "loss_breakdown/lm_loss": 7.404115422104951e-06, "loss_breakdown/pointer_loss": 0.4081732928752899, "step": 4740 }, { "epoch": 0.4843787842092516, "grad_norm": 10.012430741059724, "learning_rate": 2.8651558073654397e-06, "loss": 0.2745, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 4.916563557344489e-05, "loss_breakdown/pointer_loss": 1.8250395059585571, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 3.248765642638318e-05, "loss_breakdown/pointer_loss": 1.024944543838501, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 2.27818927669432e-05, "loss_breakdown/pointer_loss": 0.6527916789054871, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 1.8220192941953428e-05, "loss_breakdown/pointer_loss": 0.6337761878967285, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 1.3013313946430571e-05, "loss_breakdown/pointer_loss": 0.4530542492866516, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 1.596484253241215e-05, "loss_breakdown/pointer_loss": 0.4331340193748474, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 2.6231880838167854e-05, "loss_breakdown/pointer_loss": 0.5603064298629761, "step": 4750 }, { "epoch": 0.4843787842092516, "loss_breakdown/lm_loss": 8.706676453584805e-06, "loss_breakdown/pointer_loss": 0.8039088249206543, "step": 4750 }, { "epoch": 0.4853985290181132, "grad_norm": 4.937795778420565, "learning_rate": 2.8594900849858357e-06, "loss": 0.2716, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 7.512059710279573e-06, "loss_breakdown/pointer_loss": 0.3984402120113373, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 8.096181772998534e-06, "loss_breakdown/pointer_loss": 0.1332111358642578, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 9.333361049357336e-06, "loss_breakdown/pointer_loss": 0.20619089901447296, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 1.0498087249288801e-05, "loss_breakdown/pointer_loss": 0.1775372326374054, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 1.9306457033962943e-05, "loss_breakdown/pointer_loss": 0.5151623487472534, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 1.1022451872122474e-05, "loss_breakdown/pointer_loss": 0.44841325283050537, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 1.0017374734161422e-05, "loss_breakdown/pointer_loss": 0.17513665556907654, "step": 4760 }, { "epoch": 0.4853985290181132, "loss_breakdown/lm_loss": 1.1102164535259362e-05, "loss_breakdown/pointer_loss": 0.5311964750289917, "step": 4760 }, { "epoch": 0.4864182738269748, "grad_norm": 4.923715817972666, "learning_rate": 2.8538243626062322e-06, "loss": 0.2847, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 1.033476564771263e-05, "loss_breakdown/pointer_loss": 0.3905180096626282, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 1.2234875612193719e-05, "loss_breakdown/pointer_loss": 0.4436926245689392, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 9.597243661119137e-06, "loss_breakdown/pointer_loss": 0.7011665105819702, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 8.901191904442385e-06, "loss_breakdown/pointer_loss": 0.553683340549469, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 9.080041309061926e-06, "loss_breakdown/pointer_loss": 0.3372901976108551, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 7.63039497542195e-06, "loss_breakdown/pointer_loss": 0.8525644540786743, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 1.0924460184469353e-05, "loss_breakdown/pointer_loss": 0.47236838936805725, "step": 4770 }, { "epoch": 0.4864182738269748, "loss_breakdown/lm_loss": 1.1936453120142687e-05, "loss_breakdown/pointer_loss": 0.33763545751571655, "step": 4770 }, { "epoch": 0.48743801863583636, "grad_norm": 13.646307157891114, "learning_rate": 2.848158640226629e-06, "loss": 0.2812, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 1.2985487956029829e-05, "loss_breakdown/pointer_loss": 0.08455376327037811, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 1.548865475342609e-05, "loss_breakdown/pointer_loss": 0.3396705389022827, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 1.5258308849297464e-05, "loss_breakdown/pointer_loss": 0.198540598154068, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 2.0918627342325635e-05, "loss_breakdown/pointer_loss": 0.21108579635620117, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 6.921993644937174e-06, "loss_breakdown/pointer_loss": 0.0484006330370903, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 1.2142930245317984e-05, "loss_breakdown/pointer_loss": 2.161538600921631, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 1.8487207853468135e-05, "loss_breakdown/pointer_loss": 0.06913016736507416, "step": 4780 }, { "epoch": 0.48743801863583636, "loss_breakdown/lm_loss": 1.1566974535526242e-05, "loss_breakdown/pointer_loss": 0.1556767225265503, "step": 4780 }, { "epoch": 0.488457763444698, "grad_norm": 5.833767029893621, "learning_rate": 2.8424929178470257e-06, "loss": 0.279, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 7.233906671899604e-06, "loss_breakdown/pointer_loss": 0.2618902325630188, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 8.811238330963533e-06, "loss_breakdown/pointer_loss": 0.24029706418514252, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 1.7755295630195178e-05, "loss_breakdown/pointer_loss": 0.389286071062088, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 7.832717528799549e-06, "loss_breakdown/pointer_loss": 0.16114655137062073, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 7.3597507252998184e-06, "loss_breakdown/pointer_loss": 0.31007498502731323, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 7.401258699246682e-06, "loss_breakdown/pointer_loss": 0.3266524076461792, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 1.0976233170367777e-05, "loss_breakdown/pointer_loss": 0.3617108464241028, "step": 4790 }, { "epoch": 0.488457763444698, "loss_breakdown/lm_loss": 9.196869541483466e-06, "loss_breakdown/pointer_loss": 0.23701296746730804, "step": 4790 }, { "epoch": 0.48947750825355957, "grad_norm": 7.700599596238437, "learning_rate": 2.8368271954674226e-06, "loss": 0.2706, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 3.896166163031012e-05, "loss_breakdown/pointer_loss": 3.5381431579589844, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 1.4295941582531668e-05, "loss_breakdown/pointer_loss": 0.6235730051994324, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 1.6083566151792184e-05, "loss_breakdown/pointer_loss": 0.6729743480682373, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 1.5634366718586534e-05, "loss_breakdown/pointer_loss": 0.4731626808643341, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 8.175409675459377e-06, "loss_breakdown/pointer_loss": 0.7696095108985901, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 1.2485401384765282e-05, "loss_breakdown/pointer_loss": 1.0225201845169067, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 1.0748840395535808e-05, "loss_breakdown/pointer_loss": 0.5252035856246948, "step": 4800 }, { "epoch": 0.48947750825355957, "loss_breakdown/lm_loss": 1.427386359864613e-05, "loss_breakdown/pointer_loss": 0.7424025535583496, "step": 4800 }, { "epoch": 0.49049725306242115, "grad_norm": 7.636135696844726, "learning_rate": 2.831161473087819e-06, "loss": 0.2689, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 9.357161616208032e-06, "loss_breakdown/pointer_loss": 0.06480133533477783, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 1.1229265510337427e-05, "loss_breakdown/pointer_loss": 0.07814022898674011, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 1.336169498244999e-05, "loss_breakdown/pointer_loss": 0.174484521150589, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 8.034577149373945e-06, "loss_breakdown/pointer_loss": 0.043262675404548645, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 1.0900820598180871e-05, "loss_breakdown/pointer_loss": 1.4836804866790771, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 1.9478000467643142e-05, "loss_breakdown/pointer_loss": 0.18815213441848755, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 7.323317277041497e-06, "loss_breakdown/pointer_loss": 0.09004924446344376, "step": 4810 }, { "epoch": 0.49049725306242115, "loss_breakdown/lm_loss": 9.573690476827323e-06, "loss_breakdown/pointer_loss": 0.5922212600708008, "step": 4810 }, { "epoch": 0.4915169978712827, "grad_norm": 5.26531751874461, "learning_rate": 2.825495750708215e-06, "loss": 0.3107, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 8.285768672067206e-06, "loss_breakdown/pointer_loss": 0.13251759111881256, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 7.343182915064972e-06, "loss_breakdown/pointer_loss": 0.1920664757490158, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 8.072001037362497e-06, "loss_breakdown/pointer_loss": 0.4143887162208557, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 9.304893865191843e-06, "loss_breakdown/pointer_loss": 0.15128040313720703, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 1.1215979611733928e-05, "loss_breakdown/pointer_loss": 0.7158183455467224, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 7.858830940676853e-06, "loss_breakdown/pointer_loss": 0.31939810514450073, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 1.4424360415432602e-05, "loss_breakdown/pointer_loss": 1.1364432573318481, "step": 4820 }, { "epoch": 0.4915169978712827, "loss_breakdown/lm_loss": 8.89101647771895e-06, "loss_breakdown/pointer_loss": 0.38968539237976074, "step": 4820 }, { "epoch": 0.4925367426801443, "grad_norm": 10.247755528130936, "learning_rate": 2.819830028328612e-06, "loss": 0.2746, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 2.9576392989838496e-05, "loss_breakdown/pointer_loss": 0.24585986137390137, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 1.1245240784774069e-05, "loss_breakdown/pointer_loss": 0.3015540838241577, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 9.234595381713007e-06, "loss_breakdown/pointer_loss": 0.3755118250846863, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 9.182962457998656e-06, "loss_breakdown/pointer_loss": 0.24830597639083862, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 1.4348313015943859e-05, "loss_breakdown/pointer_loss": 0.23211686313152313, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 1.021206389850704e-05, "loss_breakdown/pointer_loss": 0.13368241488933563, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 1.8321563402423635e-05, "loss_breakdown/pointer_loss": 0.10053227841854095, "step": 4830 }, { "epoch": 0.4925367426801443, "loss_breakdown/lm_loss": 1.0931290489679668e-05, "loss_breakdown/pointer_loss": 0.08013392239809036, "step": 4830 }, { "epoch": 0.4935564874890059, "grad_norm": 7.61430490993628, "learning_rate": 2.8141643059490086e-06, "loss": 0.2947, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 9.220922038366552e-06, "loss_breakdown/pointer_loss": 0.3731139898300171, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 8.271872502518818e-06, "loss_breakdown/pointer_loss": 0.432558536529541, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 7.735246072115842e-06, "loss_breakdown/pointer_loss": 0.3579091727733612, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 1.0083304914587643e-05, "loss_breakdown/pointer_loss": 0.22094443440437317, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 6.462389137595892e-06, "loss_breakdown/pointer_loss": 0.2630944848060608, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 7.688231562497094e-06, "loss_breakdown/pointer_loss": 0.2988929748535156, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 1.0223792742181104e-05, "loss_breakdown/pointer_loss": 0.28978827595710754, "step": 4840 }, { "epoch": 0.4935564874890059, "loss_breakdown/lm_loss": 8.182943929568864e-06, "loss_breakdown/pointer_loss": 0.6470600962638855, "step": 4840 }, { "epoch": 0.49457623229786746, "grad_norm": 120.09710465377137, "learning_rate": 2.808498583569405e-06, "loss": 0.2545, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 3.0456538297585212e-05, "loss_breakdown/pointer_loss": 1.760310411453247, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 3.322209886391647e-05, "loss_breakdown/pointer_loss": 0.8066635131835938, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 2.10933339985786e-05, "loss_breakdown/pointer_loss": 0.7356910109519958, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 3.3090560464188457e-05, "loss_breakdown/pointer_loss": 1.3301417827606201, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 1.889433406176977e-05, "loss_breakdown/pointer_loss": 0.5726362466812134, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 1.0884698895097245e-05, "loss_breakdown/pointer_loss": 0.44651681184768677, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 1.2188272194180172e-05, "loss_breakdown/pointer_loss": 0.7527980208396912, "step": 4850 }, { "epoch": 0.49457623229786746, "loss_breakdown/lm_loss": 7.62276158638997e-06, "loss_breakdown/pointer_loss": 0.13368543982505798, "step": 4850 }, { "epoch": 0.49559597710672904, "grad_norm": 2.9386666665590613, "learning_rate": 2.802832861189802e-06, "loss": 0.2788, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 7.075158919178648e-06, "loss_breakdown/pointer_loss": 0.09746745228767395, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 7.818408448656555e-06, "loss_breakdown/pointer_loss": 0.10385160148143768, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 8.697223165654577e-06, "loss_breakdown/pointer_loss": 0.1790601760149002, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 8.609047654317692e-06, "loss_breakdown/pointer_loss": 1.0220341682434082, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 8.769701707933564e-06, "loss_breakdown/pointer_loss": 0.5644235014915466, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 1.0919352462224197e-05, "loss_breakdown/pointer_loss": 0.2312707006931305, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 1.1327821084705647e-05, "loss_breakdown/pointer_loss": 0.8437052965164185, "step": 4860 }, { "epoch": 0.49559597710672904, "loss_breakdown/lm_loss": 7.1524509621667676e-06, "loss_breakdown/pointer_loss": 0.3833182454109192, "step": 4860 }, { "epoch": 0.4966157219155906, "grad_norm": 3.5508572995617222, "learning_rate": 2.7971671388101985e-06, "loss": 0.2903, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 7.124825515347766e-06, "loss_breakdown/pointer_loss": 0.5429848432540894, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 1.0378175829828251e-05, "loss_breakdown/pointer_loss": 0.4894283413887024, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 1.5139958122745156e-05, "loss_breakdown/pointer_loss": 0.6521862149238586, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 8.182046258298215e-06, "loss_breakdown/pointer_loss": 0.5870264172554016, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 8.97180780157214e-06, "loss_breakdown/pointer_loss": 1.1098814010620117, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 9.731210411700886e-06, "loss_breakdown/pointer_loss": 0.19650676846504211, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 8.259365131380036e-06, "loss_breakdown/pointer_loss": 0.3618515729904175, "step": 4870 }, { "epoch": 0.4966157219155906, "loss_breakdown/lm_loss": 8.020143468456808e-06, "loss_breakdown/pointer_loss": 0.5963894128799438, "step": 4870 }, { "epoch": 0.4976354667244522, "grad_norm": 28.294616886624386, "learning_rate": 2.7915014164305955e-06, "loss": 0.2621, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 8.014707418624312e-06, "loss_breakdown/pointer_loss": 1.5007619857788086, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 6.854454568383517e-06, "loss_breakdown/pointer_loss": 2.9206161499023438, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 7.529959475505166e-06, "loss_breakdown/pointer_loss": 0.14206568896770477, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 1.0112762538483366e-05, "loss_breakdown/pointer_loss": 0.24955391883850098, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 9.051794222614262e-06, "loss_breakdown/pointer_loss": 0.11083370447158813, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 1.1841226296382956e-05, "loss_breakdown/pointer_loss": 0.03867419809103012, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 1.074055126082385e-05, "loss_breakdown/pointer_loss": 0.42265260219573975, "step": 4880 }, { "epoch": 0.4976354667244522, "loss_breakdown/lm_loss": 1.141995562647935e-05, "loss_breakdown/pointer_loss": 0.24817344546318054, "step": 4880 }, { "epoch": 0.49865521153331377, "grad_norm": 2.9648132038063335, "learning_rate": 2.7858356940509916e-06, "loss": 0.2831, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 8.762984180066269e-06, "loss_breakdown/pointer_loss": 0.18417666852474213, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 1.9735216483240947e-05, "loss_breakdown/pointer_loss": 0.2352694571018219, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 8.785565114521887e-06, "loss_breakdown/pointer_loss": 0.6500760316848755, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 1.0629241842252668e-05, "loss_breakdown/pointer_loss": 0.347545325756073, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 7.735827239230275e-06, "loss_breakdown/pointer_loss": 0.22452862560749054, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 9.825212146097329e-06, "loss_breakdown/pointer_loss": 0.6396569013595581, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 6.675640179309994e-06, "loss_breakdown/pointer_loss": 0.2451978623867035, "step": 4890 }, { "epoch": 0.49865521153331377, "loss_breakdown/lm_loss": 9.368352039018646e-06, "loss_breakdown/pointer_loss": 0.21378932893276215, "step": 4890 }, { "epoch": 0.49967495634217535, "grad_norm": 9.989846193685615, "learning_rate": 2.780169971671388e-06, "loss": 0.2892, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 4.93848929181695e-05, "loss_breakdown/pointer_loss": 2.289440631866455, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 3.3434644137742e-05, "loss_breakdown/pointer_loss": 0.49170589447021484, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 1.4369296877703164e-05, "loss_breakdown/pointer_loss": 0.3240973949432373, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 2.2273921786108986e-05, "loss_breakdown/pointer_loss": 0.4573790431022644, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 1.2273579159227666e-05, "loss_breakdown/pointer_loss": 0.6822577714920044, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 1.039454491547076e-05, "loss_breakdown/pointer_loss": 0.4605700671672821, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 1.4163362720864825e-05, "loss_breakdown/pointer_loss": 0.7057986259460449, "step": 4900 }, { "epoch": 0.49967495634217535, "loss_breakdown/lm_loss": 8.667682777740993e-06, "loss_breakdown/pointer_loss": 0.4180041551589966, "step": 4900 }, { "epoch": 0.500694701151037, "grad_norm": 12.690014231441632, "learning_rate": 2.774504249291785e-06, "loss": 0.2851, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 1.1833149983431213e-05, "loss_breakdown/pointer_loss": 0.2934577465057373, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 1.648263423703611e-05, "loss_breakdown/pointer_loss": 0.4272373616695404, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 9.014009265229106e-06, "loss_breakdown/pointer_loss": 0.45983657240867615, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 5.890015472687082e-06, "loss_breakdown/pointer_loss": 0.14577367901802063, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 7.297496267710812e-06, "loss_breakdown/pointer_loss": 0.0905981957912445, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 8.320694178109989e-06, "loss_breakdown/pointer_loss": 0.23933298885822296, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 1.2217296898597851e-05, "loss_breakdown/pointer_loss": 1.4901113510131836, "step": 4910 }, { "epoch": 0.500694701151037, "loss_breakdown/lm_loss": 9.365718142362311e-06, "loss_breakdown/pointer_loss": 0.19783499836921692, "step": 4910 }, { "epoch": 0.5017144459598986, "grad_norm": 1.9078492218519634, "learning_rate": 2.7688385269121815e-06, "loss": 0.3097, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 9.516520549368579e-06, "loss_breakdown/pointer_loss": 0.1664332151412964, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 1.1587866538320668e-05, "loss_breakdown/pointer_loss": 0.18930783867835999, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 9.769049029273447e-06, "loss_breakdown/pointer_loss": 0.4881044626235962, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 1.1688853192026727e-05, "loss_breakdown/pointer_loss": 0.5727505087852478, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 1.1613919923547655e-05, "loss_breakdown/pointer_loss": 0.4019415080547333, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 7.128237029974116e-06, "loss_breakdown/pointer_loss": 0.3788214325904846, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 6.671410119452048e-06, "loss_breakdown/pointer_loss": 0.47148412466049194, "step": 4920 }, { "epoch": 0.5017144459598986, "loss_breakdown/lm_loss": 1.6691465134499595e-05, "loss_breakdown/pointer_loss": 0.720037043094635, "step": 4920 }, { "epoch": 0.5027341907687601, "grad_norm": 5.299450184946241, "learning_rate": 2.763172804532578e-06, "loss": 0.268, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 1.2818635696021374e-05, "loss_breakdown/pointer_loss": 0.42726051807403564, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 8.602806701674126e-06, "loss_breakdown/pointer_loss": 0.13413745164871216, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 1.1944104699068703e-05, "loss_breakdown/pointer_loss": 0.07444227486848831, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 9.008056622406002e-06, "loss_breakdown/pointer_loss": 3.1050002574920654, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 1.2242487173352856e-05, "loss_breakdown/pointer_loss": 0.060818687081336975, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 1.636634442547802e-05, "loss_breakdown/pointer_loss": 0.4014762341976166, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 1.1992133295279928e-05, "loss_breakdown/pointer_loss": 0.040969207882881165, "step": 4930 }, { "epoch": 0.5027341907687601, "loss_breakdown/lm_loss": 1.1217304745514411e-05, "loss_breakdown/pointer_loss": 0.16487930715084076, "step": 4930 }, { "epoch": 0.5037539355776217, "grad_norm": 3.5351097533928635, "learning_rate": 2.757507082152975e-06, "loss": 0.318, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 7.90742979006609e-06, "loss_breakdown/pointer_loss": 0.18629057705402374, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 1.662203976593446e-05, "loss_breakdown/pointer_loss": 0.3888307809829712, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 1.0605421266518533e-05, "loss_breakdown/pointer_loss": 0.20555147528648376, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 2.4452672732877545e-05, "loss_breakdown/pointer_loss": 0.525658130645752, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 9.812607459025458e-06, "loss_breakdown/pointer_loss": 0.16068217158317566, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 8.05708987172693e-06, "loss_breakdown/pointer_loss": 0.18408453464508057, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 1.236553998751333e-05, "loss_breakdown/pointer_loss": 0.7346166372299194, "step": 4940 }, { "epoch": 0.5037539355776217, "loss_breakdown/lm_loss": 1.2475990843086038e-05, "loss_breakdown/pointer_loss": 0.30426210165023804, "step": 4940 }, { "epoch": 0.5047736803864833, "grad_norm": 5.232327187014364, "learning_rate": 2.751841359773371e-06, "loss": 0.2595, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 7.473040750483051e-05, "loss_breakdown/pointer_loss": 1.7678542137145996, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 2.6214378522126935e-05, "loss_breakdown/pointer_loss": 0.5001579523086548, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 2.3771293854224496e-05, "loss_breakdown/pointer_loss": 1.6893725395202637, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 2.725054400798399e-05, "loss_breakdown/pointer_loss": 0.8112548589706421, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 2.038987986452412e-05, "loss_breakdown/pointer_loss": 0.27104827761650085, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 1.6987409253488295e-05, "loss_breakdown/pointer_loss": 0.5826502442359924, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 1.9890139810740948e-05, "loss_breakdown/pointer_loss": 0.8239246010780334, "step": 4950 }, { "epoch": 0.5047736803864833, "loss_breakdown/lm_loss": 2.867615876311902e-05, "loss_breakdown/pointer_loss": 0.7256220579147339, "step": 4950 }, { "epoch": 0.5057934251953449, "grad_norm": 4.400241183557449, "learning_rate": 2.746175637393768e-06, "loss": 0.269, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 1.3188738194003236e-05, "loss_breakdown/pointer_loss": 0.3262926936149597, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 6.601476343348622e-06, "loss_breakdown/pointer_loss": 0.09241007268428802, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 7.875661140133161e-06, "loss_breakdown/pointer_loss": 0.07389435172080994, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 9.518453225609846e-06, "loss_breakdown/pointer_loss": 0.08448317646980286, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 8.168083695636597e-06, "loss_breakdown/pointer_loss": 0.6580522060394287, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 5.718001830246067e-06, "loss_breakdown/pointer_loss": 1.2316418886184692, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 1.5721787349320948e-05, "loss_breakdown/pointer_loss": 2.1901602745056152, "step": 4960 }, { "epoch": 0.5057934251953449, "loss_breakdown/lm_loss": 9.608093023416586e-06, "loss_breakdown/pointer_loss": 4.25914192199707, "step": 4960 }, { "epoch": 0.5068131700042064, "grad_norm": 2.6614423461325756, "learning_rate": 2.7405099150141644e-06, "loss": 0.2952, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 1.1510865078889765e-05, "loss_breakdown/pointer_loss": 0.12834540009498596, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 1.1370547326805536e-05, "loss_breakdown/pointer_loss": 0.5323264598846436, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 1.216412965732161e-05, "loss_breakdown/pointer_loss": 0.5177774429321289, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 9.45276133279549e-06, "loss_breakdown/pointer_loss": 0.21083706617355347, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 1.1527378774189856e-05, "loss_breakdown/pointer_loss": 0.4192195534706116, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 7.025832019280642e-06, "loss_breakdown/pointer_loss": 0.22631947696208954, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 7.806399480614346e-06, "loss_breakdown/pointer_loss": 0.14782854914665222, "step": 4970 }, { "epoch": 0.5068131700042064, "loss_breakdown/lm_loss": 9.315784154750872e-06, "loss_breakdown/pointer_loss": 0.3057929277420044, "step": 4970 }, { "epoch": 0.507832914813068, "grad_norm": 7.360710043820399, "learning_rate": 2.734844192634561e-06, "loss": 0.2693, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 8.996172255137935e-06, "loss_breakdown/pointer_loss": 0.19315125048160553, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 7.108769750630017e-06, "loss_breakdown/pointer_loss": 0.5184527039527893, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 9.890190995065495e-06, "loss_breakdown/pointer_loss": 0.31544291973114014, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 8.499505383952055e-06, "loss_breakdown/pointer_loss": 0.25342094898223877, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 1.51669128172216e-05, "loss_breakdown/pointer_loss": 0.16761668026447296, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 6.921995463926578e-06, "loss_breakdown/pointer_loss": 0.3720575273036957, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 8.622682798886672e-06, "loss_breakdown/pointer_loss": 0.14248526096343994, "step": 4980 }, { "epoch": 0.507832914813068, "loss_breakdown/lm_loss": 1.8011789506999776e-05, "loss_breakdown/pointer_loss": 3.309410572052002, "step": 4980 }, { "epoch": 0.5088526596219296, "grad_norm": 3.612682310599933, "learning_rate": 2.729178470254958e-06, "loss": 0.312, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 9.270886948797852e-06, "loss_breakdown/pointer_loss": 0.6198415756225586, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 7.923973498691339e-06, "loss_breakdown/pointer_loss": 0.3669659495353699, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 9.298118129663635e-06, "loss_breakdown/pointer_loss": 0.39830297231674194, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 6.205446425155969e-06, "loss_breakdown/pointer_loss": 0.2860459089279175, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 9.960507668438368e-06, "loss_breakdown/pointer_loss": 0.2950400710105896, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 7.958624337334186e-06, "loss_breakdown/pointer_loss": 0.3876279592514038, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 7.332027962547727e-06, "loss_breakdown/pointer_loss": 0.1870143562555313, "step": 4990 }, { "epoch": 0.5088526596219296, "loss_breakdown/lm_loss": 8.7870084826136e-06, "loss_breakdown/pointer_loss": 0.38867437839508057, "step": 4990 }, { "epoch": 0.5098724044307912, "grad_norm": 5.315672422182443, "learning_rate": 2.7235127478753544e-06, "loss": 0.2772, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 2.4729255528654903e-05, "loss_breakdown/pointer_loss": 1.6679401397705078, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 1.594411878613755e-05, "loss_breakdown/pointer_loss": 1.0787336826324463, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 1.423929825250525e-05, "loss_breakdown/pointer_loss": 0.5574523210525513, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 2.853759542631451e-05, "loss_breakdown/pointer_loss": 0.5686173439025879, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 1.2048930329910945e-05, "loss_breakdown/pointer_loss": 1.1070729494094849, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 8.081378837232478e-06, "loss_breakdown/pointer_loss": 0.6625477075576782, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 1.0861728696909267e-05, "loss_breakdown/pointer_loss": 1.338449239730835, "step": 5000 }, { "epoch": 0.5098724044307912, "loss_breakdown/lm_loss": 8.902510671759956e-06, "loss_breakdown/pointer_loss": 0.2920094132423401, "step": 5000 }, { "epoch": 0.5108921492396528, "grad_norm": 4.7297506801860685, "learning_rate": 2.7178470254957513e-06, "loss": 0.2687, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 8.908769814297557e-06, "loss_breakdown/pointer_loss": 0.13965126872062683, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 7.733870916126762e-06, "loss_breakdown/pointer_loss": 0.2099916636943817, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 9.859169040282723e-06, "loss_breakdown/pointer_loss": 0.21675501763820648, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 1.0523986929911189e-05, "loss_breakdown/pointer_loss": 0.31493979692459106, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 9.178970685752574e-06, "loss_breakdown/pointer_loss": 0.20773833990097046, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 9.141895134234801e-06, "loss_breakdown/pointer_loss": 1.1570103168487549, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 8.447835170954932e-06, "loss_breakdown/pointer_loss": 0.08431626856327057, "step": 5010 }, { "epoch": 0.5108921492396528, "loss_breakdown/lm_loss": 1.2949773008585908e-05, "loss_breakdown/pointer_loss": 3.1659600734710693, "step": 5010 }, { "epoch": 0.5119118940485143, "grad_norm": 3.55703856594504, "learning_rate": 2.7121813031161474e-06, "loss": 0.3093, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 9.48598244576715e-06, "loss_breakdown/pointer_loss": 0.19090376794338226, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 1.1863791769428644e-05, "loss_breakdown/pointer_loss": 1.3728941679000854, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 6.924149147380376e-06, "loss_breakdown/pointer_loss": 0.6532114744186401, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 7.457963874912821e-06, "loss_breakdown/pointer_loss": 0.7476599812507629, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 8.899271961126942e-06, "loss_breakdown/pointer_loss": 0.29186761379241943, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 9.149909601546824e-06, "loss_breakdown/pointer_loss": 0.5136023759841919, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 8.511741725669708e-06, "loss_breakdown/pointer_loss": 1.8059959411621094, "step": 5020 }, { "epoch": 0.5119118940485143, "loss_breakdown/lm_loss": 1.5615612937835976e-05, "loss_breakdown/pointer_loss": 1.1107099056243896, "step": 5020 }, { "epoch": 0.5129316388573759, "grad_norm": 5.80338666482927, "learning_rate": 2.706515580736544e-06, "loss": 0.2652, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 1.1014518349838909e-05, "loss_breakdown/pointer_loss": 0.12276581674814224, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 1.0196193215961102e-05, "loss_breakdown/pointer_loss": 1.270387887954712, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 1.1133714906463865e-05, "loss_breakdown/pointer_loss": 0.10945279896259308, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 9.8981445262325e-06, "loss_breakdown/pointer_loss": 1.6819204092025757, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 8.753778274694923e-06, "loss_breakdown/pointer_loss": 3.459946393966675, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 7.4663721534307115e-06, "loss_breakdown/pointer_loss": 0.07055103033781052, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 2.4508095521014184e-05, "loss_breakdown/pointer_loss": 0.310589075088501, "step": 5030 }, { "epoch": 0.5129316388573759, "loss_breakdown/lm_loss": 2.505351949366741e-05, "loss_breakdown/pointer_loss": 7.996910095214844, "step": 5030 }, { "epoch": 0.5139513836662375, "grad_norm": 3.607198960133821, "learning_rate": 2.700849858356941e-06, "loss": 0.3325, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 1.1568263289518654e-05, "loss_breakdown/pointer_loss": 0.5965436100959778, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 1.3218027561379131e-05, "loss_breakdown/pointer_loss": 0.7232266068458557, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 1.005472950055264e-05, "loss_breakdown/pointer_loss": 1.5029404163360596, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 9.18280602490995e-06, "loss_breakdown/pointer_loss": 0.17485162615776062, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 8.627947863715235e-06, "loss_breakdown/pointer_loss": 0.46522650122642517, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 1.0022146852861624e-05, "loss_breakdown/pointer_loss": 0.21688315272331238, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 1.1002219252986833e-05, "loss_breakdown/pointer_loss": 0.15718737244606018, "step": 5040 }, { "epoch": 0.5139513836662375, "loss_breakdown/lm_loss": 1.1999976777588017e-05, "loss_breakdown/pointer_loss": 0.11467176675796509, "step": 5040 }, { "epoch": 0.5149711284750991, "grad_norm": 10.342385682125693, "learning_rate": 2.6951841359773373e-06, "loss": 0.2973, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 6.581676279893145e-05, "loss_breakdown/pointer_loss": 2.1930203437805176, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 4.502044612308964e-05, "loss_breakdown/pointer_loss": 0.5453461408615112, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 3.0879193218424916e-05, "loss_breakdown/pointer_loss": 1.0171512365341187, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 2.0665127522079274e-05, "loss_breakdown/pointer_loss": 0.728790819644928, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 1.1454945706645958e-05, "loss_breakdown/pointer_loss": 0.6377679109573364, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 1.0326165465812664e-05, "loss_breakdown/pointer_loss": 0.20724141597747803, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 2.4309938453370705e-05, "loss_breakdown/pointer_loss": 0.8870880007743835, "step": 5050 }, { "epoch": 0.5149711284750991, "loss_breakdown/lm_loss": 1.5275329133146442e-05, "loss_breakdown/pointer_loss": 1.1758382320404053, "step": 5050 }, { "epoch": 0.5159908732839606, "grad_norm": 4.103220423068021, "learning_rate": 2.689518413597734e-06, "loss": 0.2841, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 9.463008609600365e-06, "loss_breakdown/pointer_loss": 0.0771714597940445, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 7.939201168483123e-06, "loss_breakdown/pointer_loss": 0.2217184454202652, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 5.99616214458365e-06, "loss_breakdown/pointer_loss": 0.24947760999202728, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 7.209392151708016e-06, "loss_breakdown/pointer_loss": 0.11206173151731491, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 8.259764399554115e-06, "loss_breakdown/pointer_loss": 0.1827019900083542, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 7.957105481182225e-06, "loss_breakdown/pointer_loss": 0.4490249752998352, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 8.630619049654342e-06, "loss_breakdown/pointer_loss": 0.6680179834365845, "step": 5060 }, { "epoch": 0.5159908732839606, "loss_breakdown/lm_loss": 7.347168775595492e-06, "loss_breakdown/pointer_loss": 0.12978272140026093, "step": 5060 }, { "epoch": 0.5170106180928222, "grad_norm": 7.767751920246243, "learning_rate": 2.6838526912181307e-06, "loss": 0.2782, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 1.0446306077938061e-05, "loss_breakdown/pointer_loss": 0.2655983865261078, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 1.1852291208924726e-05, "loss_breakdown/pointer_loss": 0.3450445234775543, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 7.381327577604679e-06, "loss_breakdown/pointer_loss": 0.27392467856407166, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 1.1411964806029573e-05, "loss_breakdown/pointer_loss": 0.17352499067783356, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 1.0214185749646276e-05, "loss_breakdown/pointer_loss": 0.20910556614398956, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 9.86278973869048e-06, "loss_breakdown/pointer_loss": 0.7930718660354614, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 1.00648148873006e-05, "loss_breakdown/pointer_loss": 0.3001147210597992, "step": 5070 }, { "epoch": 0.5170106180928222, "loss_breakdown/lm_loss": 7.570814432256157e-06, "loss_breakdown/pointer_loss": 0.7047752141952515, "step": 5070 }, { "epoch": 0.5180303629016838, "grad_norm": 150.53064692107372, "learning_rate": 2.678186968838527e-06, "loss": 0.283, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 1.0891433703363873e-05, "loss_breakdown/pointer_loss": 0.16921716928482056, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 3.061235474888235e-05, "loss_breakdown/pointer_loss": 1.6787599325180054, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 9.465025868848898e-06, "loss_breakdown/pointer_loss": 0.2328263819217682, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 1.0934895726677496e-05, "loss_breakdown/pointer_loss": 0.14886188507080078, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 1.273509315069532e-05, "loss_breakdown/pointer_loss": 3.3006885051727295, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 9.842396138992626e-06, "loss_breakdown/pointer_loss": 0.14770857989788055, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 1.4888679288560525e-05, "loss_breakdown/pointer_loss": 0.22879472374916077, "step": 5080 }, { "epoch": 0.5180303629016838, "loss_breakdown/lm_loss": 1.3064679478702601e-05, "loss_breakdown/pointer_loss": 3.7981698513031006, "step": 5080 }, { "epoch": 0.5190501077105454, "grad_norm": 2.841269668021824, "learning_rate": 2.672521246458924e-06, "loss": 0.2963, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 6.564332579728216e-06, "loss_breakdown/pointer_loss": 0.5218758583068848, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 6.161066266940907e-06, "loss_breakdown/pointer_loss": 0.4921194016933441, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 1.081602295016637e-05, "loss_breakdown/pointer_loss": 0.39507973194122314, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 7.783309229125734e-06, "loss_breakdown/pointer_loss": 0.3789042532444, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 3.8976562791503966e-05, "loss_breakdown/pointer_loss": 0.164875790476799, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 7.515702236560173e-06, "loss_breakdown/pointer_loss": 0.23416933417320251, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 1.4878920410410501e-05, "loss_breakdown/pointer_loss": 0.35867440700531006, "step": 5090 }, { "epoch": 0.5190501077105454, "loss_breakdown/lm_loss": 1.2296628483454697e-05, "loss_breakdown/pointer_loss": 0.6833188533782959, "step": 5090 }, { "epoch": 0.5200698525194071, "grad_norm": 7.028257252346691, "learning_rate": 2.6668555240793202e-06, "loss": 0.263, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 5.4097665270091966e-05, "loss_breakdown/pointer_loss": 2.763943910598755, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 1.4517484487441834e-05, "loss_breakdown/pointer_loss": 0.7463027238845825, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 4.031868593301624e-05, "loss_breakdown/pointer_loss": 1.0458906888961792, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 5.925322693656199e-05, "loss_breakdown/pointer_loss": 0.43377429246902466, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 1.2853289263148326e-05, "loss_breakdown/pointer_loss": 1.0104345083236694, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 0.00010293228115187958, "loss_breakdown/pointer_loss": 0.7577165365219116, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 1.1848816939163953e-05, "loss_breakdown/pointer_loss": 1.3994529247283936, "step": 5100 }, { "epoch": 0.5200698525194071, "loss_breakdown/lm_loss": 8.904356946004555e-06, "loss_breakdown/pointer_loss": 0.5929520726203918, "step": 5100 }, { "epoch": 0.5210895973282686, "grad_norm": 5.325347866450959, "learning_rate": 2.6611898016997168e-06, "loss": 0.2844, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 7.258040568558499e-06, "loss_breakdown/pointer_loss": 0.555271565914154, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 7.05620004737284e-06, "loss_breakdown/pointer_loss": 0.19080708920955658, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 9.428629709873348e-06, "loss_breakdown/pointer_loss": 0.962241530418396, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 8.398845238843933e-06, "loss_breakdown/pointer_loss": 0.392971932888031, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 2.469046739861369e-05, "loss_breakdown/pointer_loss": 0.14858077466487885, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 4.644408545573242e-05, "loss_breakdown/pointer_loss": 0.1486055999994278, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 1.5110405911400449e-05, "loss_breakdown/pointer_loss": 0.6318323016166687, "step": 5110 }, { "epoch": 0.5210895973282686, "loss_breakdown/lm_loss": 7.911389729997609e-06, "loss_breakdown/pointer_loss": 0.10891133546829224, "step": 5110 }, { "epoch": 0.5221093421371302, "grad_norm": 2.9102565481958895, "learning_rate": 2.6555240793201137e-06, "loss": 0.3029, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 1.2751548638334498e-05, "loss_breakdown/pointer_loss": 0.6016132235527039, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 7.112438652256969e-06, "loss_breakdown/pointer_loss": 0.38799914717674255, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 6.045171176083386e-06, "loss_breakdown/pointer_loss": 0.2725270390510559, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 1.08561025626841e-05, "loss_breakdown/pointer_loss": 0.33447539806365967, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 7.628201728948625e-06, "loss_breakdown/pointer_loss": 0.6800525784492493, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 7.690550773986615e-06, "loss_breakdown/pointer_loss": 0.4018130302429199, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 1.228328710567439e-05, "loss_breakdown/pointer_loss": 0.19204729795455933, "step": 5120 }, { "epoch": 0.5221093421371302, "loss_breakdown/lm_loss": 7.429704965034034e-06, "loss_breakdown/pointer_loss": 0.22718144953250885, "step": 5120 }, { "epoch": 0.5231290869459918, "grad_norm": 16.538910661799473, "learning_rate": 2.64985835694051e-06, "loss": 0.2705, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 8.718023309484124e-06, "loss_breakdown/pointer_loss": 0.7688292264938354, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 7.394832664431306e-06, "loss_breakdown/pointer_loss": 0.1886139214038849, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 1.0629234566295054e-05, "loss_breakdown/pointer_loss": 0.6628406047821045, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 1.0545818440732546e-05, "loss_breakdown/pointer_loss": 1.105581521987915, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 1.1058305972255766e-05, "loss_breakdown/pointer_loss": 0.23537980020046234, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 7.255773198266979e-06, "loss_breakdown/pointer_loss": 0.13807137310504913, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 8.650475137983449e-06, "loss_breakdown/pointer_loss": 0.13005122542381287, "step": 5130 }, { "epoch": 0.5231290869459918, "loss_breakdown/lm_loss": 1.953323408088181e-05, "loss_breakdown/pointer_loss": 0.08241643011569977, "step": 5130 }, { "epoch": 0.5241488317548534, "grad_norm": 3.5795128388724637, "learning_rate": 2.6441926345609063e-06, "loss": 0.2912, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 7.946609002829064e-06, "loss_breakdown/pointer_loss": 0.33549416065216064, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 6.805307293689111e-06, "loss_breakdown/pointer_loss": 0.3074735403060913, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 7.12861856300151e-06, "loss_breakdown/pointer_loss": 0.18978211283683777, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 7.624199042766122e-06, "loss_breakdown/pointer_loss": 0.25164955854415894, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 6.530767223011935e-06, "loss_breakdown/pointer_loss": 0.6248294115066528, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 1.3059127013548277e-05, "loss_breakdown/pointer_loss": 0.16558106243610382, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 7.54633629185264e-06, "loss_breakdown/pointer_loss": 0.6892581582069397, "step": 5140 }, { "epoch": 0.5241488317548534, "loss_breakdown/lm_loss": 1.0111075425811578e-05, "loss_breakdown/pointer_loss": 1.2816309928894043, "step": 5140 }, { "epoch": 0.525168576563715, "grad_norm": 16.074403907216357, "learning_rate": 2.6385269121813036e-06, "loss": 0.3088, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 4.181983968010172e-05, "loss_breakdown/pointer_loss": 2.395960569381714, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 1.301934571529273e-05, "loss_breakdown/pointer_loss": 1.0688414573669434, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 1.0058749467134476e-05, "loss_breakdown/pointer_loss": 0.9594219923019409, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 1.2167025488452055e-05, "loss_breakdown/pointer_loss": 0.8345457315444946, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 1.3996082998346537e-05, "loss_breakdown/pointer_loss": 0.38172101974487305, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 9.277204298996367e-06, "loss_breakdown/pointer_loss": 0.784459114074707, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 7.88908528193133e-06, "loss_breakdown/pointer_loss": 0.7027410268783569, "step": 5150 }, { "epoch": 0.525168576563715, "loss_breakdown/lm_loss": 1.2093501936760731e-05, "loss_breakdown/pointer_loss": 0.4575785994529724, "step": 5150 }, { "epoch": 0.5261883213725765, "grad_norm": 6.890592324246721, "learning_rate": 2.6328611898016997e-06, "loss": 0.2814, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 4.514025476964889e-06, "loss_breakdown/pointer_loss": 0.13368947803974152, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 9.507342838332988e-06, "loss_breakdown/pointer_loss": 0.7576951384544373, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 7.541855666204356e-06, "loss_breakdown/pointer_loss": 0.22767364978790283, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 1.2248452549101785e-05, "loss_breakdown/pointer_loss": 0.7780714631080627, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 1.3126091289450414e-05, "loss_breakdown/pointer_loss": 0.14754244685173035, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 1.0883500181080308e-05, "loss_breakdown/pointer_loss": 0.31261059641838074, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 7.724642273387872e-06, "loss_breakdown/pointer_loss": 0.196444571018219, "step": 5160 }, { "epoch": 0.5261883213725765, "loss_breakdown/lm_loss": 7.13654435458011e-06, "loss_breakdown/pointer_loss": 0.042605943977832794, "step": 5160 }, { "epoch": 0.5272080661814381, "grad_norm": 3.4111567686872095, "learning_rate": 2.6271954674220966e-06, "loss": 0.3014, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 7.595935585413827e-06, "loss_breakdown/pointer_loss": 0.4913257360458374, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 8.037393854465336e-06, "loss_breakdown/pointer_loss": 0.14802806079387665, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 9.206956747220829e-06, "loss_breakdown/pointer_loss": 0.5410031080245972, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 1.0623190064507071e-05, "loss_breakdown/pointer_loss": 0.42318961024284363, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 1.1319033546897117e-05, "loss_breakdown/pointer_loss": 0.24194589257240295, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 7.633695531694684e-06, "loss_breakdown/pointer_loss": 0.6261643171310425, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 2.433195913909003e-05, "loss_breakdown/pointer_loss": 0.2809935212135315, "step": 5170 }, { "epoch": 0.5272080661814381, "loss_breakdown/lm_loss": 1.0974022188747767e-05, "loss_breakdown/pointer_loss": 0.19097645580768585, "step": 5170 }, { "epoch": 0.5282278109902997, "grad_norm": 10.143618795470621, "learning_rate": 2.621529745042493e-06, "loss": 0.2577, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 2.649887937877793e-05, "loss_breakdown/pointer_loss": 0.149505615234375, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 8.849144251144025e-06, "loss_breakdown/pointer_loss": 0.4337233304977417, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 1.1181603440491017e-05, "loss_breakdown/pointer_loss": 2.501638650894165, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 1.2203947335365228e-05, "loss_breakdown/pointer_loss": 0.1809210479259491, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 7.216044650704134e-06, "loss_breakdown/pointer_loss": 0.658276379108429, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 1.20132544907392e-05, "loss_breakdown/pointer_loss": 0.2618481516838074, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 9.544507520331535e-06, "loss_breakdown/pointer_loss": 0.20194947719573975, "step": 5180 }, { "epoch": 0.5282278109902997, "loss_breakdown/lm_loss": 1.2480738405429292e-05, "loss_breakdown/pointer_loss": 0.1539977788925171, "step": 5180 }, { "epoch": 0.5292475557991613, "grad_norm": 2.6339830843099308, "learning_rate": 2.6158640226628896e-06, "loss": 0.3032, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 8.125258318614215e-06, "loss_breakdown/pointer_loss": 0.1279754936695099, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 8.240787792601623e-06, "loss_breakdown/pointer_loss": 0.3748979866504669, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 9.759075510373805e-06, "loss_breakdown/pointer_loss": 0.17746767401695251, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 5.9654512369888835e-06, "loss_breakdown/pointer_loss": 0.2737867534160614, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 7.919605195638724e-06, "loss_breakdown/pointer_loss": 0.20475822687149048, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 7.150571946112905e-06, "loss_breakdown/pointer_loss": 0.4458601474761963, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 1.0476882380316965e-05, "loss_breakdown/pointer_loss": 0.09199561178684235, "step": 5190 }, { "epoch": 0.5292475557991613, "loss_breakdown/lm_loss": 5.900796622881899e-06, "loss_breakdown/pointer_loss": 0.06327581405639648, "step": 5190 }, { "epoch": 0.5302673006080229, "grad_norm": 11.638460941472536, "learning_rate": 2.6101983002832866e-06, "loss": 0.2731, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 0.00016188729205168784, "loss_breakdown/pointer_loss": 2.8141331672668457, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 2.1295252736308612e-05, "loss_breakdown/pointer_loss": 0.7310286164283752, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 1.0303275303158443e-05, "loss_breakdown/pointer_loss": 0.32085415720939636, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 1.2871002581960056e-05, "loss_breakdown/pointer_loss": 0.6269726753234863, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 9.029401553561911e-06, "loss_breakdown/pointer_loss": 0.27776455879211426, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 1.375036754325265e-05, "loss_breakdown/pointer_loss": 0.7312264442443848, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 8.749706466915086e-06, "loss_breakdown/pointer_loss": 0.4893213212490082, "step": 5200 }, { "epoch": 0.5302673006080229, "loss_breakdown/lm_loss": 8.801805051916745e-06, "loss_breakdown/pointer_loss": 0.35950183868408203, "step": 5200 }, { "epoch": 0.5312870454168844, "grad_norm": 3.3927314330532217, "learning_rate": 2.604532577903683e-06, "loss": 0.2652, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 6.961665349081159e-06, "loss_breakdown/pointer_loss": 0.1771649718284607, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 8.445806088275276e-06, "loss_breakdown/pointer_loss": 0.2775949239730835, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 8.36747949506389e-06, "loss_breakdown/pointer_loss": 0.08880777657032013, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 6.585940809600288e-06, "loss_breakdown/pointer_loss": 0.17213687300682068, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 1.0159547855437268e-05, "loss_breakdown/pointer_loss": 0.08463598787784576, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 6.48689865556662e-06, "loss_breakdown/pointer_loss": 0.18819627165794373, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 6.170992492116056e-06, "loss_breakdown/pointer_loss": 0.07637928426265717, "step": 5210 }, { "epoch": 0.5312870454168844, "loss_breakdown/lm_loss": 1.0291444596077781e-05, "loss_breakdown/pointer_loss": 0.06136365234851837, "step": 5210 }, { "epoch": 0.532306790225746, "grad_norm": 5.863242681496342, "learning_rate": 2.59886685552408e-06, "loss": 0.2963, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 1.0063047739095055e-05, "loss_breakdown/pointer_loss": 0.2687753438949585, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 9.956186659110244e-06, "loss_breakdown/pointer_loss": 0.3763139247894287, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 7.886340426921379e-06, "loss_breakdown/pointer_loss": 0.29315900802612305, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 2.664199382707011e-05, "loss_breakdown/pointer_loss": 0.5464678406715393, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 7.185398317233194e-06, "loss_breakdown/pointer_loss": 0.6509582996368408, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 6.890626536915079e-06, "loss_breakdown/pointer_loss": 0.29525813460350037, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 6.273174221860245e-06, "loss_breakdown/pointer_loss": 0.24090610444545746, "step": 5220 }, { "epoch": 0.532306790225746, "loss_breakdown/lm_loss": 9.430881618754938e-06, "loss_breakdown/pointer_loss": 1.7596527338027954, "step": 5220 }, { "epoch": 0.5333265350346076, "grad_norm": 10.979950510209918, "learning_rate": 2.593201133144476e-06, "loss": 0.2648, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 7.982907845871523e-06, "loss_breakdown/pointer_loss": 0.021436937153339386, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 6.949811449885601e-06, "loss_breakdown/pointer_loss": 0.16324488818645477, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 1.3263398614071775e-05, "loss_breakdown/pointer_loss": 0.26036298274993896, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 2.0397450498421676e-05, "loss_breakdown/pointer_loss": 0.40223681926727295, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 7.176288818300236e-06, "loss_breakdown/pointer_loss": 0.0488395094871521, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 1.1364122656232212e-05, "loss_breakdown/pointer_loss": 0.14886188507080078, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 1.2143007552367635e-05, "loss_breakdown/pointer_loss": 3.0737335681915283, "step": 5230 }, { "epoch": 0.5333265350346076, "loss_breakdown/lm_loss": 5.489679460879415e-05, "loss_breakdown/pointer_loss": 0.09087996184825897, "step": 5230 }, { "epoch": 0.5343462798434692, "grad_norm": 3.2591861800875015, "learning_rate": 2.5875354107648726e-06, "loss": 0.3137, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 8.139186320477165e-06, "loss_breakdown/pointer_loss": 0.4744983911514282, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 7.390876817225944e-06, "loss_breakdown/pointer_loss": 0.2523624897003174, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 8.280871043098159e-06, "loss_breakdown/pointer_loss": 0.1975902020931244, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 7.011709385551512e-06, "loss_breakdown/pointer_loss": 0.13047590851783752, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 7.90774811321171e-06, "loss_breakdown/pointer_loss": 0.21055638790130615, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 7.546889264631318e-06, "loss_breakdown/pointer_loss": 0.8446502685546875, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 7.227497917483561e-06, "loss_breakdown/pointer_loss": 0.29406410455703735, "step": 5240 }, { "epoch": 0.5343462798434692, "loss_breakdown/lm_loss": 8.84519158717012e-06, "loss_breakdown/pointer_loss": 0.12037567794322968, "step": 5240 }, { "epoch": 0.5353660246523307, "grad_norm": 7.283276889801979, "learning_rate": 2.5818696883852695e-06, "loss": 0.2746, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 4.505943434196524e-05, "loss_breakdown/pointer_loss": 2.635761022567749, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 1.4253279914555606e-05, "loss_breakdown/pointer_loss": 0.7576735019683838, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 1.4187529814080335e-05, "loss_breakdown/pointer_loss": 0.838118314743042, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 1.4252177606977057e-05, "loss_breakdown/pointer_loss": 0.5187467336654663, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 2.3608228730154224e-05, "loss_breakdown/pointer_loss": 0.5406630635261536, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 1.4593140804208815e-05, "loss_breakdown/pointer_loss": 0.9785472750663757, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 8.987030014395714e-06, "loss_breakdown/pointer_loss": 1.0163609981536865, "step": 5250 }, { "epoch": 0.5353660246523307, "loss_breakdown/lm_loss": 8.032242476474494e-06, "loss_breakdown/pointer_loss": 0.35383304953575134, "step": 5250 }, { "epoch": 0.5363857694611923, "grad_norm": 4.086938132152789, "learning_rate": 2.576203966005666e-06, "loss": 0.2646, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 8.907747542252764e-06, "loss_breakdown/pointer_loss": 0.8098388910293579, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 8.501830961904489e-06, "loss_breakdown/pointer_loss": 0.7003198862075806, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 7.1961771936912555e-06, "loss_breakdown/pointer_loss": 0.28368034958839417, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 1.4566951904271264e-05, "loss_breakdown/pointer_loss": 0.2029857635498047, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 2.938241232186556e-05, "loss_breakdown/pointer_loss": 0.682948648929596, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 7.435916359099792e-06, "loss_breakdown/pointer_loss": 1.156754732131958, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 9.540569408272859e-06, "loss_breakdown/pointer_loss": 0.6919106245040894, "step": 5260 }, { "epoch": 0.5363857694611923, "loss_breakdown/lm_loss": 1.6346208212780766e-05, "loss_breakdown/pointer_loss": 0.9101120829582214, "step": 5260 }, { "epoch": 0.5374055142700539, "grad_norm": 2.8177800374775552, "learning_rate": 2.5705382436260625e-06, "loss": 0.3036, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 1.0477072464709636e-05, "loss_breakdown/pointer_loss": 0.3636862635612488, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 6.588910309801577e-06, "loss_breakdown/pointer_loss": 0.34489330649375916, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 9.526505891699344e-06, "loss_breakdown/pointer_loss": 0.21554508805274963, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 8.052285920712166e-06, "loss_breakdown/pointer_loss": 1.2701122760772705, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 9.660991054261103e-06, "loss_breakdown/pointer_loss": 0.5547676086425781, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 7.80707341618836e-06, "loss_breakdown/pointer_loss": 1.3038290739059448, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 7.402177743642824e-06, "loss_breakdown/pointer_loss": 0.8037005066871643, "step": 5270 }, { "epoch": 0.5374055142700539, "loss_breakdown/lm_loss": 6.549553745571757e-06, "loss_breakdown/pointer_loss": 0.4384187161922455, "step": 5270 }, { "epoch": 0.5384252590789155, "grad_norm": 7.218970041704773, "learning_rate": 2.5648725212464594e-06, "loss": 0.2569, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 1.4516264855046757e-05, "loss_breakdown/pointer_loss": 1.524742603302002, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 8.61471562529914e-06, "loss_breakdown/pointer_loss": 0.14190194010734558, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 8.47960382088786e-06, "loss_breakdown/pointer_loss": 0.20599794387817383, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 6.600136657652911e-06, "loss_breakdown/pointer_loss": 0.28683629631996155, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 7.812018338881899e-06, "loss_breakdown/pointer_loss": 0.06292438507080078, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 1.0370782547397539e-05, "loss_breakdown/pointer_loss": 0.06449532508850098, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 8.511415217071772e-06, "loss_breakdown/pointer_loss": 0.7889281511306763, "step": 5280 }, { "epoch": 0.5384252590789155, "loss_breakdown/lm_loss": 1.0565526281425264e-05, "loss_breakdown/pointer_loss": 0.5506214499473572, "step": 5280 }, { "epoch": 0.539445003887777, "grad_norm": 3.185649185590658, "learning_rate": 2.5592067988668555e-06, "loss": 0.3002, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 6.205270437931176e-06, "loss_breakdown/pointer_loss": 0.3618967533111572, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 7.875180017435923e-06, "loss_breakdown/pointer_loss": 0.14449545741081238, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 8.522655662090983e-06, "loss_breakdown/pointer_loss": 0.4893869161605835, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 7.027043011476053e-06, "loss_breakdown/pointer_loss": 0.18903736770153046, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 7.3523647188267205e-06, "loss_breakdown/pointer_loss": 0.19416536390781403, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 7.6147248364577536e-06, "loss_breakdown/pointer_loss": 0.18078649044036865, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 1.0075435966427904e-05, "loss_breakdown/pointer_loss": 0.18181931972503662, "step": 5290 }, { "epoch": 0.539445003887777, "loss_breakdown/lm_loss": 6.238552032300504e-06, "loss_breakdown/pointer_loss": 0.19990140199661255, "step": 5290 }, { "epoch": 0.5404647486966386, "grad_norm": 12.170536568487163, "learning_rate": 2.5535410764872524e-06, "loss": 0.2741, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 4.740965960081667e-05, "loss_breakdown/pointer_loss": 2.288315534591675, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 1.711634649836924e-05, "loss_breakdown/pointer_loss": 0.5157116651535034, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 1.1844444088637829e-05, "loss_breakdown/pointer_loss": 0.5912133455276489, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 1.0298983397660777e-05, "loss_breakdown/pointer_loss": 1.2751448154449463, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 1.1071100743720308e-05, "loss_breakdown/pointer_loss": 0.7414968609809875, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 1.01891637314111e-05, "loss_breakdown/pointer_loss": 0.48825037479400635, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 9.90501393971499e-06, "loss_breakdown/pointer_loss": 1.0020062923431396, "step": 5300 }, { "epoch": 0.5404647486966386, "loss_breakdown/lm_loss": 7.748154530418105e-06, "loss_breakdown/pointer_loss": 0.9880059361457825, "step": 5300 }, { "epoch": 0.5414844935055002, "grad_norm": 4.65147960078044, "learning_rate": 2.547875354107649e-06, "loss": 0.2772, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 9.528584087092895e-06, "loss_breakdown/pointer_loss": 0.04053747281432152, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 7.371012543444522e-06, "loss_breakdown/pointer_loss": 0.9256628751754761, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 8.218952643801458e-06, "loss_breakdown/pointer_loss": 0.30314382910728455, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 7.788223228999414e-06, "loss_breakdown/pointer_loss": 0.2002616822719574, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 8.817384696158115e-06, "loss_breakdown/pointer_loss": 0.15894201397895813, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 7.402796200040029e-06, "loss_breakdown/pointer_loss": 0.3165002465248108, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 7.012045443843817e-06, "loss_breakdown/pointer_loss": 0.07800193130970001, "step": 5310 }, { "epoch": 0.5414844935055002, "loss_breakdown/lm_loss": 1.055375651048962e-05, "loss_breakdown/pointer_loss": 0.42476218938827515, "step": 5310 }, { "epoch": 0.5425042383143618, "grad_norm": 3.3572550825637193, "learning_rate": 2.5422096317280454e-06, "loss": 0.302, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 9.151201993518043e-06, "loss_breakdown/pointer_loss": 0.1419275403022766, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 7.613102297909791e-06, "loss_breakdown/pointer_loss": 0.3423328399658203, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 8.444770173809957e-06, "loss_breakdown/pointer_loss": 1.209463119506836, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 1.5980187527020462e-05, "loss_breakdown/pointer_loss": 0.9275815486907959, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 9.051282177097164e-06, "loss_breakdown/pointer_loss": 0.2146376222372055, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 6.286792540777242e-06, "loss_breakdown/pointer_loss": 0.6578179001808167, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 7.786391506670043e-06, "loss_breakdown/pointer_loss": 0.46424663066864014, "step": 5320 }, { "epoch": 0.5425042383143618, "loss_breakdown/lm_loss": 1.1326348612783477e-05, "loss_breakdown/pointer_loss": 0.25705039501190186, "step": 5320 }, { "epoch": 0.5435239831232234, "grad_norm": 23.02934072429159, "learning_rate": 2.5365439093484424e-06, "loss": 0.2729, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 1.301723750657402e-05, "loss_breakdown/pointer_loss": 0.554268479347229, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 8.28492375148926e-06, "loss_breakdown/pointer_loss": 0.2906642258167267, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 7.927296792331617e-06, "loss_breakdown/pointer_loss": 0.11856107413768768, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 2.0632023733924143e-05, "loss_breakdown/pointer_loss": 1.8077654838562012, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 1.1733898645616136e-05, "loss_breakdown/pointer_loss": 0.024247657507658005, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 1.3200070497987326e-05, "loss_breakdown/pointer_loss": 0.5211443901062012, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 1.2751085705531295e-05, "loss_breakdown/pointer_loss": 1.110640287399292, "step": 5330 }, { "epoch": 0.5435239831232234, "loss_breakdown/lm_loss": 2.3338217943091877e-05, "loss_breakdown/pointer_loss": 0.2442467212677002, "step": 5330 }, { "epoch": 0.5445437279320849, "grad_norm": 7.028150540896167, "learning_rate": 2.530878186968839e-06, "loss": 0.2754, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 7.569891295133857e-06, "loss_breakdown/pointer_loss": 0.44267594814300537, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 9.908744686981663e-06, "loss_breakdown/pointer_loss": 0.6697245836257935, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 8.757662726566195e-06, "loss_breakdown/pointer_loss": 0.16649490594863892, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 7.289535460586194e-06, "loss_breakdown/pointer_loss": 0.4576151669025421, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 7.920858479337767e-06, "loss_breakdown/pointer_loss": 0.5441673398017883, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 6.3851243794488255e-06, "loss_breakdown/pointer_loss": 0.44986093044281006, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 8.014715604076628e-06, "loss_breakdown/pointer_loss": 0.43646568059921265, "step": 5340 }, { "epoch": 0.5445437279320849, "loss_breakdown/lm_loss": 7.19036734153633e-06, "loss_breakdown/pointer_loss": 0.4835246801376343, "step": 5340 }, { "epoch": 0.5455634727409466, "grad_norm": 5.164107768097737, "learning_rate": 2.525212464589235e-06, "loss": 0.2767, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 3.242971797590144e-05, "loss_breakdown/pointer_loss": 1.271080493927002, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 3.7285244616214186e-05, "loss_breakdown/pointer_loss": 0.6112114191055298, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 1.2623977454495616e-05, "loss_breakdown/pointer_loss": 0.501889705657959, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 1.0566335731709842e-05, "loss_breakdown/pointer_loss": 0.20633086562156677, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 1.2662456356338225e-05, "loss_breakdown/pointer_loss": 0.8085973262786865, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 1.1894077942997683e-05, "loss_breakdown/pointer_loss": 0.4817873537540436, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 1.1083800927735865e-05, "loss_breakdown/pointer_loss": 0.8591817021369934, "step": 5350 }, { "epoch": 0.5455634727409466, "loss_breakdown/lm_loss": 8.269532372651156e-06, "loss_breakdown/pointer_loss": 0.28795620799064636, "step": 5350 }, { "epoch": 0.5465832175498082, "grad_norm": 5.68948258684531, "learning_rate": 2.519546742209632e-06, "loss": 0.2803, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 7.658417416678276e-06, "loss_breakdown/pointer_loss": 0.7793558835983276, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 7.0795945248391945e-06, "loss_breakdown/pointer_loss": 0.21828025579452515, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 9.604880688129924e-06, "loss_breakdown/pointer_loss": 0.7634989023208618, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 9.498820872977376e-06, "loss_breakdown/pointer_loss": 0.2411971241235733, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 9.019944627652876e-06, "loss_breakdown/pointer_loss": 1.894515037536621, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 8.296831765619572e-06, "loss_breakdown/pointer_loss": 0.25232142210006714, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 1.1980175258941017e-05, "loss_breakdown/pointer_loss": 0.20131772756576538, "step": 5360 }, { "epoch": 0.5465832175498082, "loss_breakdown/lm_loss": 8.582938789913896e-06, "loss_breakdown/pointer_loss": 0.4944039583206177, "step": 5360 }, { "epoch": 0.5476029623586698, "grad_norm": 2.863616145014128, "learning_rate": 2.5138810198300284e-06, "loss": 0.2826, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 1.4000514056533575e-05, "loss_breakdown/pointer_loss": 1.5727450847625732, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 1.204752697958611e-05, "loss_breakdown/pointer_loss": 0.10913790762424469, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 7.214896868390497e-06, "loss_breakdown/pointer_loss": 0.31215962767601013, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 8.153227099683136e-06, "loss_breakdown/pointer_loss": 0.2718348801136017, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 8.182753845176194e-06, "loss_breakdown/pointer_loss": 0.723112165927887, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 7.0913060881139245e-06, "loss_breakdown/pointer_loss": 0.43829694390296936, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 7.763698704366107e-06, "loss_breakdown/pointer_loss": 1.0190612077713013, "step": 5370 }, { "epoch": 0.5476029623586698, "loss_breakdown/lm_loss": 8.016001629584935e-06, "loss_breakdown/pointer_loss": 0.6019670963287354, "step": 5370 }, { "epoch": 0.5486227071675314, "grad_norm": 4.933081529113081, "learning_rate": 2.5082152974504253e-06, "loss": 0.2632, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 1.822223566705361e-05, "loss_breakdown/pointer_loss": 1.4667041301727295, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 8.217361028073356e-06, "loss_breakdown/pointer_loss": 0.10951600968837738, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 8.769673513597809e-06, "loss_breakdown/pointer_loss": 2.6870956420898438, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 9.349731953989249e-06, "loss_breakdown/pointer_loss": 0.17778673768043518, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 1.2885982869192958e-05, "loss_breakdown/pointer_loss": 0.08993528038263321, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 1.0537818525335751e-05, "loss_breakdown/pointer_loss": 0.09184929728507996, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 1.3879294783691876e-05, "loss_breakdown/pointer_loss": 0.17495298385620117, "step": 5380 }, { "epoch": 0.5486227071675314, "loss_breakdown/lm_loss": 1.183316362585174e-05, "loss_breakdown/pointer_loss": 0.1418137550354004, "step": 5380 }, { "epoch": 0.5496424519763929, "grad_norm": 2.257268197988596, "learning_rate": 2.502549575070822e-06, "loss": 0.2919, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 8.668729606142733e-06, "loss_breakdown/pointer_loss": 0.23714083433151245, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 8.701302249392029e-06, "loss_breakdown/pointer_loss": 0.42571866512298584, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 1.2706229426839855e-05, "loss_breakdown/pointer_loss": 0.2707116901874542, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 1.2464769497455563e-05, "loss_breakdown/pointer_loss": 0.8208645582199097, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 9.940511517925188e-06, "loss_breakdown/pointer_loss": 0.12708032131195068, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 1.0199039934377652e-05, "loss_breakdown/pointer_loss": 0.4691070020198822, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 1.2136539226048626e-05, "loss_breakdown/pointer_loss": 0.2794896960258484, "step": 5390 }, { "epoch": 0.5496424519763929, "loss_breakdown/lm_loss": 9.872655027720612e-06, "loss_breakdown/pointer_loss": 0.2583022713661194, "step": 5390 }, { "epoch": 0.5506621967852545, "grad_norm": 5.242794342700459, "learning_rate": 2.4968838526912183e-06, "loss": 0.2889, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 0.0001143076951848343, "loss_breakdown/pointer_loss": 2.6203413009643555, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 2.954626143036876e-05, "loss_breakdown/pointer_loss": 0.6185932755470276, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 1.8677885236684233e-05, "loss_breakdown/pointer_loss": 0.5574179291725159, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 1.405733019055333e-05, "loss_breakdown/pointer_loss": 0.850999653339386, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 1.7316984667559154e-05, "loss_breakdown/pointer_loss": 1.5365941524505615, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 1.2678703569690697e-05, "loss_breakdown/pointer_loss": 0.5034167766571045, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 1.260862063645618e-05, "loss_breakdown/pointer_loss": 0.8163149356842041, "step": 5400 }, { "epoch": 0.5506621967852545, "loss_breakdown/lm_loss": 8.38567575556226e-06, "loss_breakdown/pointer_loss": 0.4694744348526001, "step": 5400 }, { "epoch": 0.5516819415941161, "grad_norm": 4.189567735381666, "learning_rate": 2.491218130311615e-06, "loss": 0.2819, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 1.0789523003040813e-05, "loss_breakdown/pointer_loss": 0.13076861202716827, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 7.756442755635362e-06, "loss_breakdown/pointer_loss": 2.7212204933166504, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 8.118014193314593e-06, "loss_breakdown/pointer_loss": 0.24599015712738037, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 7.462396752089262e-06, "loss_breakdown/pointer_loss": 0.12235347926616669, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 1.153521407104563e-05, "loss_breakdown/pointer_loss": 0.209837406873703, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 1.0861648661375511e-05, "loss_breakdown/pointer_loss": 0.15328721702098846, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 2.4589768145233393e-05, "loss_breakdown/pointer_loss": 0.10290698707103729, "step": 5410 }, { "epoch": 0.5516819415941161, "loss_breakdown/lm_loss": 1.1209294825675897e-05, "loss_breakdown/pointer_loss": 4.528114318847656, "step": 5410 }, { "epoch": 0.5527016864029777, "grad_norm": 5.0098278309410205, "learning_rate": 2.4855524079320113e-06, "loss": 0.2864, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 8.677699042891618e-06, "loss_breakdown/pointer_loss": 1.0380847454071045, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 8.489770152664278e-06, "loss_breakdown/pointer_loss": 0.4033023715019226, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 1.1547320355020929e-05, "loss_breakdown/pointer_loss": 0.564998984336853, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 7.717133485130034e-06, "loss_breakdown/pointer_loss": 0.7465578317642212, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 6.804668373661116e-06, "loss_breakdown/pointer_loss": 0.4167945384979248, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 1.115755640057614e-05, "loss_breakdown/pointer_loss": 0.20401820540428162, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 1.333066938968841e-05, "loss_breakdown/pointer_loss": 0.40113043785095215, "step": 5420 }, { "epoch": 0.5527016864029777, "loss_breakdown/lm_loss": 9.845582098932937e-06, "loss_breakdown/pointer_loss": 0.6522382497787476, "step": 5420 }, { "epoch": 0.5537214312118393, "grad_norm": 11.17541174006572, "learning_rate": 2.4798866855524083e-06, "loss": 0.2528, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 8.77764614415355e-06, "loss_breakdown/pointer_loss": 0.10447761416435242, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 7.307433406822383e-06, "loss_breakdown/pointer_loss": 0.18897509574890137, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 1.1189483302587178e-05, "loss_breakdown/pointer_loss": 2.5241787433624268, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 1.258821885130601e-05, "loss_breakdown/pointer_loss": 0.8776216506958008, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 8.698149031260982e-06, "loss_breakdown/pointer_loss": 0.07643638551235199, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 1.2262299605936278e-05, "loss_breakdown/pointer_loss": 0.04981791228055954, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 1.447912654839456e-05, "loss_breakdown/pointer_loss": 0.2379821538925171, "step": 5430 }, { "epoch": 0.5537214312118393, "loss_breakdown/lm_loss": 8.622667337476742e-06, "loss_breakdown/pointer_loss": 0.259807825088501, "step": 5430 }, { "epoch": 0.5547411760207008, "grad_norm": 2.4953613584991197, "learning_rate": 2.4742209631728048e-06, "loss": 0.2942, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 6.2113895182847045e-06, "loss_breakdown/pointer_loss": 0.13224440813064575, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 8.723649443709292e-06, "loss_breakdown/pointer_loss": 0.7686137557029724, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 8.523274118488189e-06, "loss_breakdown/pointer_loss": 0.20791545510292053, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 8.359388630196918e-06, "loss_breakdown/pointer_loss": 0.5289491415023804, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 7.974997060955502e-06, "loss_breakdown/pointer_loss": 0.39518827199935913, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 7.512346201110631e-06, "loss_breakdown/pointer_loss": 0.21747754514217377, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 9.457725354877766e-06, "loss_breakdown/pointer_loss": 0.23973554372787476, "step": 5440 }, { "epoch": 0.5547411760207008, "loss_breakdown/lm_loss": 7.58358964958461e-06, "loss_breakdown/pointer_loss": 0.08938856422901154, "step": 5440 }, { "epoch": 0.5557609208295624, "grad_norm": 7.1053330094777944, "learning_rate": 2.4685552407932013e-06, "loss": 0.257, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 5.1597766287159175e-05, "loss_breakdown/pointer_loss": 2.126532793045044, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.9629784219432622e-05, "loss_breakdown/pointer_loss": 0.40347474813461304, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.3602742001239676e-05, "loss_breakdown/pointer_loss": 0.6280513405799866, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.1220677151868585e-05, "loss_breakdown/pointer_loss": 0.26947730779647827, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.1221491149626672e-05, "loss_breakdown/pointer_loss": 0.3898421823978424, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.5143871678446885e-05, "loss_breakdown/pointer_loss": 0.49478858709335327, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.19413334687124e-05, "loss_breakdown/pointer_loss": 0.5887372493743896, "step": 5450 }, { "epoch": 0.5557609208295624, "loss_breakdown/lm_loss": 1.4323266441351734e-05, "loss_breakdown/pointer_loss": 0.4728865623474121, "step": 5450 }, { "epoch": 0.556780665638424, "grad_norm": 3.3327289577402333, "learning_rate": 2.4628895184135978e-06, "loss": 0.2524, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 7.53522499508108e-06, "loss_breakdown/pointer_loss": 0.41633036732673645, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 9.880004654405639e-06, "loss_breakdown/pointer_loss": 0.363943487405777, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 7.217631264211377e-06, "loss_breakdown/pointer_loss": 0.513786792755127, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 8.390195944230072e-06, "loss_breakdown/pointer_loss": 1.9922314882278442, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 6.703456165269017e-06, "loss_breakdown/pointer_loss": 0.16449111700057983, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 1.1696043657138944e-05, "loss_breakdown/pointer_loss": 1.2629894018173218, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 6.739222044416238e-06, "loss_breakdown/pointer_loss": 0.08763933181762695, "step": 5460 }, { "epoch": 0.556780665638424, "loss_breakdown/lm_loss": 7.3643764153530356e-06, "loss_breakdown/pointer_loss": 0.3523924946784973, "step": 5460 }, { "epoch": 0.5578004104472856, "grad_norm": 3.3341315052484632, "learning_rate": 2.4572237960339947e-06, "loss": 0.2837, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 1.0798166840686463e-05, "loss_breakdown/pointer_loss": 0.36139196157455444, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 1.2984545719518792e-05, "loss_breakdown/pointer_loss": 0.1784456968307495, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 6.575806764885783e-06, "loss_breakdown/pointer_loss": 0.6888617277145386, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 7.1971503530221526e-06, "loss_breakdown/pointer_loss": 0.2925705909729004, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 8.563037226849701e-06, "loss_breakdown/pointer_loss": 0.8925994038581848, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 8.52309040055843e-06, "loss_breakdown/pointer_loss": 0.5835518836975098, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 1.0482121979293879e-05, "loss_breakdown/pointer_loss": 0.25683754682540894, "step": 5470 }, { "epoch": 0.5578004104472856, "loss_breakdown/lm_loss": 1.008094386634184e-05, "loss_breakdown/pointer_loss": 0.527398407459259, "step": 5470 }, { "epoch": 0.5588201552561471, "grad_norm": 23.266630743229868, "learning_rate": 2.451558073654391e-06, "loss": 0.2679, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 7.855777766963001e-06, "loss_breakdown/pointer_loss": 0.39722704887390137, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 7.653134161955677e-06, "loss_breakdown/pointer_loss": 0.4630778729915619, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 8.40806751511991e-06, "loss_breakdown/pointer_loss": 0.22450807690620422, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 9.78688240138581e-06, "loss_breakdown/pointer_loss": 0.6579372882843018, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 9.63585807767231e-06, "loss_breakdown/pointer_loss": 2.355651378631592, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 8.590863217250444e-06, "loss_breakdown/pointer_loss": 0.052639029920101166, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 7.092854957591044e-06, "loss_breakdown/pointer_loss": 0.09990657866001129, "step": 5480 }, { "epoch": 0.5588201552561471, "loss_breakdown/lm_loss": 1.4447799003391992e-05, "loss_breakdown/pointer_loss": 0.19258838891983032, "step": 5480 }, { "epoch": 0.5598399000650087, "grad_norm": 2.5609861217404988, "learning_rate": 2.4458923512747877e-06, "loss": 0.2925, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 6.4843361542443745e-06, "loss_breakdown/pointer_loss": 0.5752092599868774, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 8.964370863395743e-06, "loss_breakdown/pointer_loss": 0.22107216715812683, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 9.958386726793833e-06, "loss_breakdown/pointer_loss": 0.23257382214069366, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 1.5257608538377099e-05, "loss_breakdown/pointer_loss": 1.5080581903457642, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 8.114573574857786e-06, "loss_breakdown/pointer_loss": 0.13465656340122223, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 1.1848365829791874e-05, "loss_breakdown/pointer_loss": 0.34118878841400146, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 6.313252015388571e-06, "loss_breakdown/pointer_loss": 0.1372183859348297, "step": 5490 }, { "epoch": 0.5598399000650087, "loss_breakdown/lm_loss": 1.1002300198015291e-05, "loss_breakdown/pointer_loss": 0.5731834173202515, "step": 5490 }, { "epoch": 0.5608596448738703, "grad_norm": 12.05999457436201, "learning_rate": 2.440226628895184e-06, "loss": 0.2687, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 3.7196143239270896e-05, "loss_breakdown/pointer_loss": 0.9763890504837036, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 1.718823659757618e-05, "loss_breakdown/pointer_loss": 0.2546728253364563, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 1.83594547706889e-05, "loss_breakdown/pointer_loss": 0.5271579027175903, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 9.976073670259211e-06, "loss_breakdown/pointer_loss": 0.3832268714904785, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 1.3420000868791249e-05, "loss_breakdown/pointer_loss": 0.5956710577011108, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 1.1314262337691616e-05, "loss_breakdown/pointer_loss": 0.35279127955436707, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 8.389985850953963e-06, "loss_breakdown/pointer_loss": 1.2845427989959717, "step": 5500 }, { "epoch": 0.5608596448738703, "loss_breakdown/lm_loss": 1.261998841073364e-05, "loss_breakdown/pointer_loss": 0.9574741125106812, "step": 5500 }, { "epoch": 0.5618793896827319, "grad_norm": 4.134441171659845, "learning_rate": 2.4345609065155807e-06, "loss": 0.2739, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 5.466357833938673e-06, "loss_breakdown/pointer_loss": 0.1731395572423935, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 6.069004939490696e-06, "loss_breakdown/pointer_loss": 0.31313952803611755, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 1.0875633961404674e-05, "loss_breakdown/pointer_loss": 0.24336901307106018, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 1.134843478212133e-05, "loss_breakdown/pointer_loss": 2.3753182888031006, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 1.329112819803413e-05, "loss_breakdown/pointer_loss": 0.572106659412384, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 1.3831232536176685e-05, "loss_breakdown/pointer_loss": 0.4588177502155304, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 9.373698048875667e-06, "loss_breakdown/pointer_loss": 2.079650640487671, "step": 5510 }, { "epoch": 0.5618793896827319, "loss_breakdown/lm_loss": 7.251806437125197e-06, "loss_breakdown/pointer_loss": 0.20764675736427307, "step": 5510 }, { "epoch": 0.5628991344915935, "grad_norm": 3.1144115895542805, "learning_rate": 2.4288951841359776e-06, "loss": 0.2987, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 7.495056252082577e-06, "loss_breakdown/pointer_loss": 0.276274710893631, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 7.248729616549099e-06, "loss_breakdown/pointer_loss": 0.4511571526527405, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 7.510710474889493e-06, "loss_breakdown/pointer_loss": 0.6492196917533875, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 7.190463747974718e-06, "loss_breakdown/pointer_loss": 0.3474803566932678, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 6.665933597105322e-06, "loss_breakdown/pointer_loss": 0.24972590804100037, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 8.172118214133661e-06, "loss_breakdown/pointer_loss": 0.18718774616718292, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 8.571411854063626e-06, "loss_breakdown/pointer_loss": 0.8951919674873352, "step": 5520 }, { "epoch": 0.5628991344915935, "loss_breakdown/lm_loss": 5.693256298400229e-06, "loss_breakdown/pointer_loss": 0.5076097249984741, "step": 5520 }, { "epoch": 0.563918879300455, "grad_norm": 5.808551075637717, "learning_rate": 2.423229461756374e-06, "loss": 0.2473, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 8.714047908142675e-06, "loss_breakdown/pointer_loss": 0.3468773066997528, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 8.841118869895581e-06, "loss_breakdown/pointer_loss": 0.08525942265987396, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 7.176309281931026e-06, "loss_breakdown/pointer_loss": 0.49463194608688354, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 1.3088857485854533e-05, "loss_breakdown/pointer_loss": 0.23870894312858582, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 9.755064638738986e-06, "loss_breakdown/pointer_loss": 0.17543596029281616, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 6.882262823637575e-06, "loss_breakdown/pointer_loss": 0.7364938855171204, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 1.248087573912926e-05, "loss_breakdown/pointer_loss": 0.12382447719573975, "step": 5530 }, { "epoch": 0.563918879300455, "loss_breakdown/lm_loss": 9.242551641364116e-06, "loss_breakdown/pointer_loss": 0.21214669942855835, "step": 5530 }, { "epoch": 0.5649386241093166, "grad_norm": 2.5527158750564234, "learning_rate": 2.4175637393767706e-06, "loss": 0.278, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 7.040547643555328e-06, "loss_breakdown/pointer_loss": 0.3927536606788635, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 7.20344360161107e-06, "loss_breakdown/pointer_loss": 0.22923240065574646, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 1.062040428223554e-05, "loss_breakdown/pointer_loss": 0.10615313053131104, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 8.301064553961623e-06, "loss_breakdown/pointer_loss": 0.604722261428833, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 7.044195172056789e-06, "loss_breakdown/pointer_loss": 0.16048991680145264, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 6.983584626141237e-06, "loss_breakdown/pointer_loss": 0.28329527378082275, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 8.876979336491786e-06, "loss_breakdown/pointer_loss": 0.2869676947593689, "step": 5540 }, { "epoch": 0.5649386241093166, "loss_breakdown/lm_loss": 8.341867214767262e-06, "loss_breakdown/pointer_loss": 0.10910052061080933, "step": 5540 }, { "epoch": 0.5659583689181782, "grad_norm": 4.364274494688475, "learning_rate": 2.411898016997167e-06, "loss": 0.2872, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 2.6703981347964145e-05, "loss_breakdown/pointer_loss": 2.1148905754089355, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 1.2283710020710714e-05, "loss_breakdown/pointer_loss": 1.5306591987609863, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 1.3185343050281517e-05, "loss_breakdown/pointer_loss": 0.8651050329208374, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 1.0893211765505839e-05, "loss_breakdown/pointer_loss": 1.4877660274505615, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 8.9909217422246e-06, "loss_breakdown/pointer_loss": 0.7724220752716064, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 8.96210713108303e-06, "loss_breakdown/pointer_loss": 0.9916563630104065, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 1.1747144526452757e-05, "loss_breakdown/pointer_loss": 0.41975611448287964, "step": 5550 }, { "epoch": 0.5659583689181782, "loss_breakdown/lm_loss": 7.3170135692635085e-06, "loss_breakdown/pointer_loss": 0.26613324880599976, "step": 5550 }, { "epoch": 0.5669781137270398, "grad_norm": 3.793890324278223, "learning_rate": 2.406232294617564e-06, "loss": 0.2731, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 6.955785465834197e-06, "loss_breakdown/pointer_loss": 0.11686909198760986, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 7.171016932261409e-06, "loss_breakdown/pointer_loss": 0.5388299226760864, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 1.3744556781603023e-05, "loss_breakdown/pointer_loss": 0.11032896488904953, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 6.837226464995183e-06, "loss_breakdown/pointer_loss": 0.5213960409164429, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 7.69684265833348e-06, "loss_breakdown/pointer_loss": 0.30830129981040955, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 8.538391739421058e-06, "loss_breakdown/pointer_loss": 0.07709028571844101, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 8.500784133502748e-06, "loss_breakdown/pointer_loss": 0.4142836332321167, "step": 5560 }, { "epoch": 0.5669781137270398, "loss_breakdown/lm_loss": 7.666380042792298e-06, "loss_breakdown/pointer_loss": 3.254307508468628, "step": 5560 }, { "epoch": 0.5679978585359013, "grad_norm": 4.154162684286955, "learning_rate": 2.4005665722379606e-06, "loss": 0.2938, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 8.428919500147458e-06, "loss_breakdown/pointer_loss": 0.1386834979057312, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 7.115692824299913e-06, "loss_breakdown/pointer_loss": 0.47755417227745056, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 7.510081104555866e-06, "loss_breakdown/pointer_loss": 0.49793097376823425, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 7.179044132499257e-06, "loss_breakdown/pointer_loss": 0.5243765115737915, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 1.043810789269628e-05, "loss_breakdown/pointer_loss": 0.2508520781993866, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 7.2120583354262635e-06, "loss_breakdown/pointer_loss": 0.7763301730155945, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 7.5553784881776664e-06, "loss_breakdown/pointer_loss": 0.23330199718475342, "step": 5570 }, { "epoch": 0.5679978585359013, "loss_breakdown/lm_loss": 7.78460980654927e-06, "loss_breakdown/pointer_loss": 0.3616308569908142, "step": 5570 }, { "epoch": 0.5690176033447629, "grad_norm": 5.709366030161015, "learning_rate": 2.394900849858357e-06, "loss": 0.2539, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 7.565719442936825e-06, "loss_breakdown/pointer_loss": 0.5641878843307495, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 6.4690275394241326e-06, "loss_breakdown/pointer_loss": 0.382868230342865, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 7.745831680949777e-06, "loss_breakdown/pointer_loss": 0.19438403844833374, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 7.4028066592291e-06, "loss_breakdown/pointer_loss": 4.403062343597412, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 8.539233022020198e-06, "loss_breakdown/pointer_loss": 0.6824109554290771, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 7.740556611679494e-06, "loss_breakdown/pointer_loss": 0.23741376399993896, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 8.439888915745541e-06, "loss_breakdown/pointer_loss": 0.22635261714458466, "step": 5580 }, { "epoch": 0.5690176033447629, "loss_breakdown/lm_loss": 7.470334367098985e-06, "loss_breakdown/pointer_loss": 0.07112927734851837, "step": 5580 }, { "epoch": 0.5700373481536245, "grad_norm": 13.716501700252605, "learning_rate": 2.3892351274787536e-06, "loss": 0.2891, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 6.039874733687611e-06, "loss_breakdown/pointer_loss": 0.6689626574516296, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 7.846128937671892e-06, "loss_breakdown/pointer_loss": 0.3297961950302124, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 7.053122317302041e-06, "loss_breakdown/pointer_loss": 0.18720580637454987, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 6.7968348957947455e-06, "loss_breakdown/pointer_loss": 0.10323488712310791, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 6.8765889409405645e-06, "loss_breakdown/pointer_loss": 0.5902898907661438, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 6.97957466400112e-06, "loss_breakdown/pointer_loss": 0.14191776514053345, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 5.91073376199347e-06, "loss_breakdown/pointer_loss": 0.08193973451852798, "step": 5590 }, { "epoch": 0.5700373481536245, "loss_breakdown/lm_loss": 7.851812370063271e-06, "loss_breakdown/pointer_loss": 0.1821950078010559, "step": 5590 }, { "epoch": 0.5710570929624862, "grad_norm": 17.118124864732998, "learning_rate": 2.3835694050991505e-06, "loss": 0.255, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 1.727752351143863e-05, "loss_breakdown/pointer_loss": 1.5050510168075562, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 8.859363333613146e-06, "loss_breakdown/pointer_loss": 0.41562700271606445, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 1.3896753443987109e-05, "loss_breakdown/pointer_loss": 1.6011866331100464, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 8.495313522871584e-06, "loss_breakdown/pointer_loss": 0.23503276705741882, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 7.307886335183866e-06, "loss_breakdown/pointer_loss": 0.39012712240219116, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 9.364443030790426e-06, "loss_breakdown/pointer_loss": 0.860397219657898, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 8.20147124613868e-06, "loss_breakdown/pointer_loss": 0.5088979005813599, "step": 5600 }, { "epoch": 0.5710570929624862, "loss_breakdown/lm_loss": 6.353241587930825e-06, "loss_breakdown/pointer_loss": 0.1388849914073944, "step": 5600 }, { "epoch": 0.5720768377713478, "grad_norm": 4.196860032108031, "learning_rate": 2.377903682719547e-06, "loss": 0.2729, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 5.922428954363568e-06, "loss_breakdown/pointer_loss": 0.26933595538139343, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 8.08756612968864e-06, "loss_breakdown/pointer_loss": 0.45374590158462524, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 6.944492724869633e-06, "loss_breakdown/pointer_loss": 0.4048234224319458, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 7.929003913886845e-06, "loss_breakdown/pointer_loss": 0.41333550214767456, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 7.343198831222253e-06, "loss_breakdown/pointer_loss": 0.1003931388258934, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 6.058421604393516e-06, "loss_breakdown/pointer_loss": 0.516251266002655, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 5.670333393936744e-06, "loss_breakdown/pointer_loss": 0.1745525300502777, "step": 5610 }, { "epoch": 0.5720768377713478, "loss_breakdown/lm_loss": 7.148505119403126e-06, "loss_breakdown/pointer_loss": 0.28249526023864746, "step": 5610 }, { "epoch": 0.5730965825802093, "grad_norm": 3.8122253961230226, "learning_rate": 2.3722379603399435e-06, "loss": 0.2758, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 7.043453024380142e-06, "loss_breakdown/pointer_loss": 0.38917505741119385, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 7.1694898906571325e-06, "loss_breakdown/pointer_loss": 1.0802274942398071, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 6.71846783006913e-06, "loss_breakdown/pointer_loss": 0.8679790496826172, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 5.955234883003868e-06, "loss_breakdown/pointer_loss": 0.33863377571105957, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 6.442122867156286e-06, "loss_breakdown/pointer_loss": 0.5073358416557312, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 7.85704378358787e-06, "loss_breakdown/pointer_loss": 0.3077837824821472, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 8.602379239164293e-06, "loss_breakdown/pointer_loss": 0.315586119890213, "step": 5620 }, { "epoch": 0.5730965825802093, "loss_breakdown/lm_loss": 8.687634363013785e-06, "loss_breakdown/pointer_loss": 0.34244680404663086, "step": 5620 }, { "epoch": 0.5741163273890709, "grad_norm": 4.554980554914736, "learning_rate": 2.36657223796034e-06, "loss": 0.2643, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 7.378897407761542e-06, "loss_breakdown/pointer_loss": 0.2331857681274414, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 7.26770076653338e-06, "loss_breakdown/pointer_loss": 0.5099970102310181, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 2.368739660596475e-05, "loss_breakdown/pointer_loss": 0.10829468071460724, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 8.04252613306744e-06, "loss_breakdown/pointer_loss": 0.11398465931415558, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 1.7184212993015535e-05, "loss_breakdown/pointer_loss": 0.04964494705200195, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 8.106086170300841e-06, "loss_breakdown/pointer_loss": 0.18143074214458466, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 8.777643415669445e-06, "loss_breakdown/pointer_loss": 0.373535692691803, "step": 5630 }, { "epoch": 0.5741163273890709, "loss_breakdown/lm_loss": 1.03828178907861e-05, "loss_breakdown/pointer_loss": 0.23015277087688446, "step": 5630 }, { "epoch": 0.5751360721979325, "grad_norm": 3.6932620130404903, "learning_rate": 2.360906515580737e-06, "loss": 0.2809, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 7.544741492893081e-06, "loss_breakdown/pointer_loss": 0.16174079477787018, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 9.813437827688176e-06, "loss_breakdown/pointer_loss": 0.9913824796676636, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 6.851224043202819e-06, "loss_breakdown/pointer_loss": 0.4489319324493408, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 8.440858437097631e-06, "loss_breakdown/pointer_loss": 0.9033418893814087, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 7.706571523158345e-06, "loss_breakdown/pointer_loss": 0.3385017216205597, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 1.0248244507238269e-05, "loss_breakdown/pointer_loss": 0.3799170255661011, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 1.0642042980180122e-05, "loss_breakdown/pointer_loss": 0.3069743812084198, "step": 5640 }, { "epoch": 0.5751360721979325, "loss_breakdown/lm_loss": 8.665298992127646e-06, "loss_breakdown/pointer_loss": 0.10585173964500427, "step": 5640 }, { "epoch": 0.5761558170067941, "grad_norm": 8.819935001166943, "learning_rate": 2.3552407932011335e-06, "loss": 0.2656, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 4.449240077519789e-05, "loss_breakdown/pointer_loss": 1.9630354642868042, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 1.7079157260013744e-05, "loss_breakdown/pointer_loss": 0.8203860521316528, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 1.4809069398324937e-05, "loss_breakdown/pointer_loss": 0.4636872708797455, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 1.141192205977859e-05, "loss_breakdown/pointer_loss": 1.0646612644195557, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 1.8187907699029893e-05, "loss_breakdown/pointer_loss": 1.1193913221359253, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 3.6017296224599704e-05, "loss_breakdown/pointer_loss": 0.38338589668273926, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 1.332116335106548e-05, "loss_breakdown/pointer_loss": 0.7383629083633423, "step": 5650 }, { "epoch": 0.5761558170067941, "loss_breakdown/lm_loss": 1.0121750165126286e-05, "loss_breakdown/pointer_loss": 1.0791826248168945, "step": 5650 }, { "epoch": 0.5771755618156557, "grad_norm": 3.25601433962345, "learning_rate": 2.34957507082153e-06, "loss": 0.2595, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 8.888796401151922e-06, "loss_breakdown/pointer_loss": 0.2038707286119461, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 8.750682354730088e-06, "loss_breakdown/pointer_loss": 0.21359571814537048, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 9.623866390029434e-06, "loss_breakdown/pointer_loss": 0.32306498289108276, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 6.998825938353548e-06, "loss_breakdown/pointer_loss": 0.6036400198936462, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 5.907042577746324e-06, "loss_breakdown/pointer_loss": 0.19841551780700684, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 7.234201802930329e-06, "loss_breakdown/pointer_loss": 0.2493285834789276, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 6.989544090174604e-06, "loss_breakdown/pointer_loss": 0.16798926889896393, "step": 5660 }, { "epoch": 0.5771755618156557, "loss_breakdown/lm_loss": 1.4324053154268768e-05, "loss_breakdown/pointer_loss": 1.0899673700332642, "step": 5660 }, { "epoch": 0.5781953066245172, "grad_norm": 2.7404713826378635, "learning_rate": 2.3439093484419265e-06, "loss": 0.2775, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 8.209030056605116e-06, "loss_breakdown/pointer_loss": 0.29180002212524414, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 6.476193902926752e-06, "loss_breakdown/pointer_loss": 0.18954038619995117, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 9.010119356389623e-06, "loss_breakdown/pointer_loss": 0.5782662034034729, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 4.037367398268543e-05, "loss_breakdown/pointer_loss": 0.4595649540424347, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 7.1995859798335005e-06, "loss_breakdown/pointer_loss": 0.7744015455245972, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 1.0068845767818857e-05, "loss_breakdown/pointer_loss": 0.6237236261367798, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 6.643378583248705e-06, "loss_breakdown/pointer_loss": 0.31382036209106445, "step": 5670 }, { "epoch": 0.5781953066245172, "loss_breakdown/lm_loss": 8.853490726323798e-06, "loss_breakdown/pointer_loss": 1.239992618560791, "step": 5670 }, { "epoch": 0.5792150514333788, "grad_norm": 7.431589710563034, "learning_rate": 2.338243626062323e-06, "loss": 0.2778, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 6.759100415365538e-06, "loss_breakdown/pointer_loss": 2.424457311630249, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 5.8988189266528934e-05, "loss_breakdown/pointer_loss": 0.22843502461910248, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 9.41735015658196e-06, "loss_breakdown/pointer_loss": 0.5061800479888916, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 8.71800602908479e-06, "loss_breakdown/pointer_loss": 0.16093386709690094, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 8.078303835645784e-06, "loss_breakdown/pointer_loss": 0.01763056218624115, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 9.194841368298512e-06, "loss_breakdown/pointer_loss": 0.12944450974464417, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 8.07827655080473e-06, "loss_breakdown/pointer_loss": 0.323772668838501, "step": 5680 }, { "epoch": 0.5792150514333788, "loss_breakdown/lm_loss": 1.661711939959787e-05, "loss_breakdown/pointer_loss": 0.6477792263031006, "step": 5680 }, { "epoch": 0.5802347962422404, "grad_norm": 2.398326659351558, "learning_rate": 2.33257790368272e-06, "loss": 0.2753, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 9.78206026047701e-06, "loss_breakdown/pointer_loss": 0.1656983196735382, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 6.399480753316311e-06, "loss_breakdown/pointer_loss": 0.1680224984884262, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 5.494417109730421e-06, "loss_breakdown/pointer_loss": 0.0926423892378807, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 8.267290468211286e-06, "loss_breakdown/pointer_loss": 0.11720681935548782, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 1.7918486264534295e-05, "loss_breakdown/pointer_loss": 0.5204126238822937, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 7.380645911325701e-06, "loss_breakdown/pointer_loss": 0.24276477098464966, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 6.076917998143472e-06, "loss_breakdown/pointer_loss": 0.1324400007724762, "step": 5690 }, { "epoch": 0.5802347962422404, "loss_breakdown/lm_loss": 6.968676188989775e-06, "loss_breakdown/pointer_loss": 0.5231750011444092, "step": 5690 }, { "epoch": 0.581254541051102, "grad_norm": 8.990474036084663, "learning_rate": 2.3269121813031164e-06, "loss": 0.2715, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 5.767346374341287e-05, "loss_breakdown/pointer_loss": 1.8069802522659302, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 1.9796543710981496e-05, "loss_breakdown/pointer_loss": 0.6038047671318054, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 1.4949927390262019e-05, "loss_breakdown/pointer_loss": 0.539886474609375, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 1.4643781469203532e-05, "loss_breakdown/pointer_loss": 0.6085708737373352, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 1.1752825230360031e-05, "loss_breakdown/pointer_loss": 0.9098473787307739, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 1.112202153308317e-05, "loss_breakdown/pointer_loss": 0.5156306624412537, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 3.5133958590449765e-05, "loss_breakdown/pointer_loss": 0.666500985622406, "step": 5700 }, { "epoch": 0.581254541051102, "loss_breakdown/lm_loss": 1.0396805009804666e-05, "loss_breakdown/pointer_loss": 0.22498928010463715, "step": 5700 }, { "epoch": 0.5822742858599635, "grad_norm": 3.5445897738415866, "learning_rate": 2.321246458923513e-06, "loss": 0.2537, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 9.749745004228316e-06, "loss_breakdown/pointer_loss": 0.15773408114910126, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 5.647806574415881e-06, "loss_breakdown/pointer_loss": 0.1578923761844635, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 1.3184531781007536e-05, "loss_breakdown/pointer_loss": 2.8614542484283447, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 6.441181540139951e-06, "loss_breakdown/pointer_loss": 1.0147054195404053, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 8.564383279008325e-06, "loss_breakdown/pointer_loss": 0.05107352137565613, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 9.631922694097739e-06, "loss_breakdown/pointer_loss": 0.5608763694763184, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 6.723331352986861e-06, "loss_breakdown/pointer_loss": 0.21717484295368195, "step": 5710 }, { "epoch": 0.5822742858599635, "loss_breakdown/lm_loss": 7.700811693212017e-06, "loss_breakdown/pointer_loss": 0.2924419343471527, "step": 5710 }, { "epoch": 0.5832940306688251, "grad_norm": 2.7384802262642944, "learning_rate": 2.3155807365439094e-06, "loss": 0.3093, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 1.155123482021736e-05, "loss_breakdown/pointer_loss": 0.6172099113464355, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 1.0175100214837585e-05, "loss_breakdown/pointer_loss": 0.844264566898346, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 9.90991793514695e-06, "loss_breakdown/pointer_loss": 0.7775204181671143, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 1.2410136150720064e-05, "loss_breakdown/pointer_loss": 0.7597507238388062, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 8.962858373706695e-06, "loss_breakdown/pointer_loss": 0.8064263463020325, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 8.237774636654649e-06, "loss_breakdown/pointer_loss": 0.2003396600484848, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 6.944348115212051e-06, "loss_breakdown/pointer_loss": 0.4903694689273834, "step": 5720 }, { "epoch": 0.5832940306688251, "loss_breakdown/lm_loss": 7.059676590870367e-06, "loss_breakdown/pointer_loss": 0.3594498634338379, "step": 5720 }, { "epoch": 0.5843137754776867, "grad_norm": 5.4461330886076444, "learning_rate": 2.3099150141643063e-06, "loss": 0.2636, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 8.006773896340746e-06, "loss_breakdown/pointer_loss": 0.3935728073120117, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 7.239868864417076e-06, "loss_breakdown/pointer_loss": 0.2857552468776703, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 7.287550033652224e-06, "loss_breakdown/pointer_loss": 0.0799376592040062, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 6.9458246798603795e-06, "loss_breakdown/pointer_loss": 0.20925623178482056, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 7.2994648689928e-06, "loss_breakdown/pointer_loss": 0.13380229473114014, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 6.202787062647985e-06, "loss_breakdown/pointer_loss": 0.3735182583332062, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 1.3795625818602275e-05, "loss_breakdown/pointer_loss": 0.19918189942836761, "step": 5730 }, { "epoch": 0.5843137754776867, "loss_breakdown/lm_loss": 7.1445087996835355e-06, "loss_breakdown/pointer_loss": 0.16787587106227875, "step": 5730 }, { "epoch": 0.5853335202865483, "grad_norm": 2.5703741172712, "learning_rate": 2.304249291784703e-06, "loss": 0.2926, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 9.856204997049645e-06, "loss_breakdown/pointer_loss": 0.3035714626312256, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 6.298770585999591e-06, "loss_breakdown/pointer_loss": 0.3011566698551178, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 7.684083357162308e-06, "loss_breakdown/pointer_loss": 0.23591841757297516, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 7.921274118416477e-06, "loss_breakdown/pointer_loss": 0.3971439003944397, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 6.700208359688986e-06, "loss_breakdown/pointer_loss": 1.500020146369934, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 1.0984927939716727e-05, "loss_breakdown/pointer_loss": 0.17203016579151154, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 6.601837867492577e-06, "loss_breakdown/pointer_loss": 0.46691882610321045, "step": 5740 }, { "epoch": 0.5853335202865483, "loss_breakdown/lm_loss": 5.698143468180206e-06, "loss_breakdown/pointer_loss": 0.6922069787979126, "step": 5740 }, { "epoch": 0.5863532650954099, "grad_norm": 30.558223006235306, "learning_rate": 2.2985835694050993e-06, "loss": 0.2722, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 1.9764769604080357e-05, "loss_breakdown/pointer_loss": 0.9038715362548828, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 3.0412638807320036e-05, "loss_breakdown/pointer_loss": 0.6581292152404785, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 1.3157409739505965e-05, "loss_breakdown/pointer_loss": 0.9823724031448364, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 1.1594775060075335e-05, "loss_breakdown/pointer_loss": 0.6123813986778259, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 2.5513812943245284e-05, "loss_breakdown/pointer_loss": 0.483451783657074, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 1.3311703696672339e-05, "loss_breakdown/pointer_loss": 0.5766128897666931, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 1.0906333955063019e-05, "loss_breakdown/pointer_loss": 0.34200841188430786, "step": 5750 }, { "epoch": 0.5863532650954099, "loss_breakdown/lm_loss": 8.682425686856732e-06, "loss_breakdown/pointer_loss": 0.2700371742248535, "step": 5750 }, { "epoch": 0.5873730099042714, "grad_norm": 7.79795932802364, "learning_rate": 2.292917847025496e-06, "loss": 0.2635, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 7.5746520451502874e-06, "loss_breakdown/pointer_loss": 0.31590622663497925, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 6.602530902455328e-06, "loss_breakdown/pointer_loss": 0.46803420782089233, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 7.972867933858652e-06, "loss_breakdown/pointer_loss": 0.23629236221313477, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 4.676937805925263e-06, "loss_breakdown/pointer_loss": 0.22128179669380188, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 1.1100480151071679e-05, "loss_breakdown/pointer_loss": 0.08166307210922241, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 6.635889803874306e-06, "loss_breakdown/pointer_loss": 0.31073057651519775, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 6.767026661691489e-06, "loss_breakdown/pointer_loss": 0.8227478265762329, "step": 5760 }, { "epoch": 0.5873730099042714, "loss_breakdown/lm_loss": 8.68027018441353e-06, "loss_breakdown/pointer_loss": 0.3216647803783417, "step": 5760 }, { "epoch": 0.588392754713133, "grad_norm": 6.057840254406092, "learning_rate": 2.2872521246458928e-06, "loss": 0.2813, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 8.926233931561e-06, "loss_breakdown/pointer_loss": 0.9712279438972473, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 7.881516467023175e-06, "loss_breakdown/pointer_loss": 0.5106322765350342, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 5.9399571910034865e-06, "loss_breakdown/pointer_loss": 0.6716965436935425, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 9.619553566153627e-06, "loss_breakdown/pointer_loss": 0.458548903465271, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 5.4565516620641574e-06, "loss_breakdown/pointer_loss": 0.42190876603126526, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 5.545026851905277e-06, "loss_breakdown/pointer_loss": 0.116263747215271, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 6.694419880659552e-06, "loss_breakdown/pointer_loss": 0.4119200110435486, "step": 5770 }, { "epoch": 0.588392754713133, "loss_breakdown/lm_loss": 6.11199538980145e-06, "loss_breakdown/pointer_loss": 0.5117801427841187, "step": 5770 }, { "epoch": 0.5894124995219946, "grad_norm": 7.074392279163748, "learning_rate": 2.2815864022662893e-06, "loss": 0.2547, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 9.429239980818238e-06, "loss_breakdown/pointer_loss": 0.4324086606502533, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 8.241154318966437e-06, "loss_breakdown/pointer_loss": 0.14565610885620117, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 6.802800271543674e-06, "loss_breakdown/pointer_loss": 0.8770701885223389, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 7.454437309206696e-06, "loss_breakdown/pointer_loss": 0.07304325699806213, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 8.038574378588237e-06, "loss_breakdown/pointer_loss": 0.7117695212364197, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 5.95642586631584e-06, "loss_breakdown/pointer_loss": 0.11383938789367676, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 6.318025953078177e-06, "loss_breakdown/pointer_loss": 0.05696915090084076, "step": 5780 }, { "epoch": 0.5894124995219946, "loss_breakdown/lm_loss": 6.218682301550871e-06, "loss_breakdown/pointer_loss": 0.23558422923088074, "step": 5780 }, { "epoch": 0.5904322443308562, "grad_norm": 3.8231191667248896, "learning_rate": 2.2759206798866858e-06, "loss": 0.2821, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 5.678845809597988e-06, "loss_breakdown/pointer_loss": 0.35761934518814087, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 9.161062735074665e-06, "loss_breakdown/pointer_loss": 0.10435589402914047, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 5.473643795994576e-06, "loss_breakdown/pointer_loss": 0.11849083751440048, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 6.439844128180994e-06, "loss_breakdown/pointer_loss": 0.2593528926372528, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 7.553198884124868e-06, "loss_breakdown/pointer_loss": 0.1910708248615265, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 6.257092536543496e-06, "loss_breakdown/pointer_loss": 0.19281749427318573, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 7.750904842396267e-06, "loss_breakdown/pointer_loss": 0.10919570922851562, "step": 5790 }, { "epoch": 0.5904322443308562, "loss_breakdown/lm_loss": 8.481770237267483e-06, "loss_breakdown/pointer_loss": 0.5280694365501404, "step": 5790 }, { "epoch": 0.5914519891397177, "grad_norm": 14.240635394390344, "learning_rate": 2.2702549575070823e-06, "loss": 0.2472, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 6.824368756497279e-05, "loss_breakdown/pointer_loss": 2.159022331237793, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 1.3012559975322802e-05, "loss_breakdown/pointer_loss": 0.48031312227249146, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 1.5266676200553775e-05, "loss_breakdown/pointer_loss": 0.5234141945838928, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 9.88077499641804e-06, "loss_breakdown/pointer_loss": 0.4109764099121094, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 8.870368219504599e-06, "loss_breakdown/pointer_loss": 0.5246168375015259, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 8.4324801719049e-06, "loss_breakdown/pointer_loss": 0.29289546608924866, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 8.170946784957778e-06, "loss_breakdown/pointer_loss": 0.5081073045730591, "step": 5800 }, { "epoch": 0.5914519891397177, "loss_breakdown/lm_loss": 8.366401743842289e-06, "loss_breakdown/pointer_loss": 0.19393417239189148, "step": 5800 }, { "epoch": 0.5924717339485793, "grad_norm": 15.580148111556237, "learning_rate": 2.2645892351274788e-06, "loss": 0.2562, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 6.560399015143048e-06, "loss_breakdown/pointer_loss": 0.1573726087808609, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 5.594038611889118e-06, "loss_breakdown/pointer_loss": 0.17678458988666534, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 5.324634912540205e-06, "loss_breakdown/pointer_loss": 0.33855533599853516, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 6.249129910429474e-06, "loss_breakdown/pointer_loss": 0.33835574984550476, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 9.675638466433156e-06, "loss_breakdown/pointer_loss": 0.28895217180252075, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 7.36303445592057e-06, "loss_breakdown/pointer_loss": 0.12215442955493927, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 6.643852429988328e-06, "loss_breakdown/pointer_loss": 0.05751466751098633, "step": 5810 }, { "epoch": 0.5924717339485793, "loss_breakdown/lm_loss": 4.462364813662134e-06, "loss_breakdown/pointer_loss": 0.04264716058969498, "step": 5810 }, { "epoch": 0.5934914787574409, "grad_norm": 3.3818001671896885, "learning_rate": 2.2589235127478757e-06, "loss": 0.2749, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 5.738830623158719e-06, "loss_breakdown/pointer_loss": 0.13154596090316772, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 6.245161785045639e-06, "loss_breakdown/pointer_loss": 0.47341737151145935, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 5.929001872573281e-06, "loss_breakdown/pointer_loss": 0.2573879063129425, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 7.696289685554802e-06, "loss_breakdown/pointer_loss": 0.21332547068595886, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 7.136947260732995e-06, "loss_breakdown/pointer_loss": 0.41340190172195435, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 7.200099844340002e-06, "loss_breakdown/pointer_loss": 0.5128703713417053, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 8.275483196484856e-06, "loss_breakdown/pointer_loss": 0.4380805790424347, "step": 5820 }, { "epoch": 0.5934914787574409, "loss_breakdown/lm_loss": 7.785706657159608e-06, "loss_breakdown/pointer_loss": 0.42885929346084595, "step": 5820 }, { "epoch": 0.5945112235663025, "grad_norm": 4.395223966215287, "learning_rate": 2.2532577903682722e-06, "loss": 0.2491, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 1.2703081665677018e-05, "loss_breakdown/pointer_loss": 2.4163379669189453, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 7.502113930968335e-06, "loss_breakdown/pointer_loss": 0.48230215907096863, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 9.627909093978815e-06, "loss_breakdown/pointer_loss": 2.162200927734375, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 6.083579592086608e-06, "loss_breakdown/pointer_loss": 2.0260047912597656, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 2.178192517021671e-05, "loss_breakdown/pointer_loss": 0.3997992277145386, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 5.416020940174349e-06, "loss_breakdown/pointer_loss": 0.040905289351940155, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 8.014724699023645e-06, "loss_breakdown/pointer_loss": 0.4100634455680847, "step": 5830 }, { "epoch": 0.5945112235663025, "loss_breakdown/lm_loss": 8.217339200200513e-06, "loss_breakdown/pointer_loss": 0.0577346608042717, "step": 5830 }, { "epoch": 0.5955309683751641, "grad_norm": 2.5367495599959082, "learning_rate": 2.2475920679886687e-06, "loss": 0.2787, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 8.781913493294269e-06, "loss_breakdown/pointer_loss": 0.2260679453611374, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 6.170972937979968e-06, "loss_breakdown/pointer_loss": 0.381041556596756, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 6.855755145807052e-06, "loss_breakdown/pointer_loss": 0.3351750671863556, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 8.309307304443792e-06, "loss_breakdown/pointer_loss": 0.10029764473438263, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 8.602700290794019e-06, "loss_breakdown/pointer_loss": 0.490092933177948, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 6.645268058491638e-06, "loss_breakdown/pointer_loss": 0.6575033068656921, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 7.266369721037336e-06, "loss_breakdown/pointer_loss": 1.412449836730957, "step": 5840 }, { "epoch": 0.5955309683751641, "loss_breakdown/lm_loss": 9.336996299680322e-06, "loss_breakdown/pointer_loss": 0.10343360900878906, "step": 5840 }, { "epoch": 0.5965507131840257, "grad_norm": 15.85895089014848, "learning_rate": 2.2419263456090652e-06, "loss": 0.2817, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 7.620050018886104e-05, "loss_breakdown/pointer_loss": 1.6023190021514893, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 1.265404080186272e-05, "loss_breakdown/pointer_loss": 1.5216553211212158, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 1.767132562235929e-05, "loss_breakdown/pointer_loss": 0.6180158853530884, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 1.3115827641740907e-05, "loss_breakdown/pointer_loss": 0.8010103106498718, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 8.841412636684254e-06, "loss_breakdown/pointer_loss": 0.34311509132385254, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 7.46713567423285e-06, "loss_breakdown/pointer_loss": 0.5300866961479187, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 9.216001672029961e-06, "loss_breakdown/pointer_loss": 1.323718547821045, "step": 5850 }, { "epoch": 0.5965507131840257, "loss_breakdown/lm_loss": 2.7980706363450736e-05, "loss_breakdown/pointer_loss": 0.3060428500175476, "step": 5850 }, { "epoch": 0.5975704579928873, "grad_norm": 3.2646043659549697, "learning_rate": 2.236260623229462e-06, "loss": 0.2745, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 7.911362445156556e-06, "loss_breakdown/pointer_loss": 0.34050023555755615, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 7.529935373895569e-06, "loss_breakdown/pointer_loss": 0.17175054550170898, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 6.381541879818542e-06, "loss_breakdown/pointer_loss": 1.125312328338623, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 9.401086572324857e-06, "loss_breakdown/pointer_loss": 0.2584018111228943, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 9.775935723155271e-06, "loss_breakdown/pointer_loss": 1.348264217376709, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 7.037225714157103e-06, "loss_breakdown/pointer_loss": 0.7312897443771362, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 1.1046387953683734e-05, "loss_breakdown/pointer_loss": 0.245456263422966, "step": 5860 }, { "epoch": 0.5975704579928873, "loss_breakdown/lm_loss": 9.254432370653376e-06, "loss_breakdown/pointer_loss": 0.06572628766298294, "step": 5860 }, { "epoch": 0.5985902028017489, "grad_norm": 2.9780023261211754, "learning_rate": 2.2305949008498582e-06, "loss": 0.2626, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 7.3744377004913986e-06, "loss_breakdown/pointer_loss": 0.3619592785835266, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 7.264040505106095e-06, "loss_breakdown/pointer_loss": 0.5621590614318848, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 7.1524509621667676e-06, "loss_breakdown/pointer_loss": 0.46040159463882446, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 9.303410479333252e-06, "loss_breakdown/pointer_loss": 0.3527650833129883, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 1.2212177352921572e-05, "loss_breakdown/pointer_loss": 1.0121192932128906, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 8.570786121708807e-06, "loss_breakdown/pointer_loss": 0.7796074151992798, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 7.794621524226386e-06, "loss_breakdown/pointer_loss": 0.6718522310256958, "step": 5870 }, { "epoch": 0.5985902028017489, "loss_breakdown/lm_loss": 5.199456154514337e-06, "loss_breakdown/pointer_loss": 0.2299792468547821, "step": 5870 }, { "epoch": 0.5996099476106105, "grad_norm": 5.7738342293486955, "learning_rate": 2.224929178470255e-06, "loss": 0.2523, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 6.4451760408701375e-06, "loss_breakdown/pointer_loss": 7.002100944519043, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 2.2565858671441674e-05, "loss_breakdown/pointer_loss": 0.5241253972053528, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 5.694168521586107e-06, "loss_breakdown/pointer_loss": 0.27631354331970215, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 8.72991768119391e-06, "loss_breakdown/pointer_loss": 0.975984513759613, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 1.2449009773263242e-05, "loss_breakdown/pointer_loss": 0.09022893011569977, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 6.544493317051092e-06, "loss_breakdown/pointer_loss": 2.549952983856201, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 7.283591003215406e-06, "loss_breakdown/pointer_loss": 0.12835407257080078, "step": 5880 }, { "epoch": 0.5996099476106105, "loss_breakdown/lm_loss": 1.1590746908041183e-05, "loss_breakdown/pointer_loss": 0.10979938507080078, "step": 5880 }, { "epoch": 0.6006296924194721, "grad_norm": 4.493419381346244, "learning_rate": 2.2192634560906517e-06, "loss": 0.3121, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 6.401432074198965e-06, "loss_breakdown/pointer_loss": 0.2775323987007141, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 6.496559308288852e-06, "loss_breakdown/pointer_loss": 0.6562715768814087, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 7.574752544314833e-06, "loss_breakdown/pointer_loss": 0.9047781229019165, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 6.242496965569444e-06, "loss_breakdown/pointer_loss": 0.17883622646331787, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 6.531881354021607e-06, "loss_breakdown/pointer_loss": 0.4818708598613739, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 6.592680620087776e-06, "loss_breakdown/pointer_loss": 0.11389105021953583, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 7.290705070772674e-06, "loss_breakdown/pointer_loss": 0.1555444598197937, "step": 5890 }, { "epoch": 0.6006296924194721, "loss_breakdown/lm_loss": 1.2365062502794899e-05, "loss_breakdown/pointer_loss": 0.44879886507987976, "step": 5890 }, { "epoch": 0.6016494372283336, "grad_norm": 66.12121391621108, "learning_rate": 2.2135977337110486e-06, "loss": 0.2628, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 1.908034937514458e-05, "loss_breakdown/pointer_loss": 1.5251390933990479, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 1.813425842556171e-05, "loss_breakdown/pointer_loss": 0.5652559995651245, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 1.1345928214723244e-05, "loss_breakdown/pointer_loss": 0.5439811944961548, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 7.698429726588074e-06, "loss_breakdown/pointer_loss": 0.4249817728996277, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 7.782402462908067e-06, "loss_breakdown/pointer_loss": 0.6848012208938599, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 1.1158019333379343e-05, "loss_breakdown/pointer_loss": 0.5055491924285889, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 1.1541207641130313e-05, "loss_breakdown/pointer_loss": 0.24790555238723755, "step": 5900 }, { "epoch": 0.6016494372283336, "loss_breakdown/lm_loss": 1.0543740245338995e-05, "loss_breakdown/pointer_loss": 0.5880364179611206, "step": 5900 }, { "epoch": 0.6026691820371952, "grad_norm": 4.278554772926283, "learning_rate": 2.2079320113314447e-06, "loss": 0.2486, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 6.083588687033625e-06, "loss_breakdown/pointer_loss": 0.14010877907276154, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 6.214707809704123e-06, "loss_breakdown/pointer_loss": 0.0851205438375473, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 7.890211236372124e-06, "loss_breakdown/pointer_loss": 0.15228398144245148, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 7.2696830102358945e-06, "loss_breakdown/pointer_loss": 1.120509147644043, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 7.239860224217409e-06, "loss_breakdown/pointer_loss": 3.0227277278900146, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 8.765608072280884e-06, "loss_breakdown/pointer_loss": 0.29503872990608215, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 7.881590136094019e-06, "loss_breakdown/pointer_loss": 0.07497428357601166, "step": 5910 }, { "epoch": 0.6026691820371952, "loss_breakdown/lm_loss": 1.0756435585790314e-05, "loss_breakdown/pointer_loss": 0.2214234173297882, "step": 5910 }, { "epoch": 0.6036889268460568, "grad_norm": 2.299650082649306, "learning_rate": 2.2022662889518416e-06, "loss": 0.2883, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 7.856649972382002e-06, "loss_breakdown/pointer_loss": 0.4759659767150879, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 3.112648118985817e-05, "loss_breakdown/pointer_loss": 0.4033486247062683, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 8.83645498106489e-06, "loss_breakdown/pointer_loss": 0.4115564525127411, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 1.2603948562173173e-05, "loss_breakdown/pointer_loss": 1.7189207077026367, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 7.844884748919867e-06, "loss_breakdown/pointer_loss": 0.38591986894607544, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 6.2620852077088784e-06, "loss_breakdown/pointer_loss": 0.33030956983566284, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 1.1688919585139956e-05, "loss_breakdown/pointer_loss": 0.6166483163833618, "step": 5920 }, { "epoch": 0.6036889268460568, "loss_breakdown/lm_loss": 8.04596857051365e-06, "loss_breakdown/pointer_loss": 0.47729453444480896, "step": 5920 }, { "epoch": 0.6047086716549184, "grad_norm": 8.802062721846227, "learning_rate": 2.196600566572238e-06, "loss": 0.2547, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 6.4292908064089715e-06, "loss_breakdown/pointer_loss": 0.06072945147752762, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 1.3434209904517047e-05, "loss_breakdown/pointer_loss": 3.0270190238952637, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 5.527283065021038e-06, "loss_breakdown/pointer_loss": 0.14696842432022095, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 1.1944008292630315e-05, "loss_breakdown/pointer_loss": 0.11231103539466858, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 8.013394108274952e-06, "loss_breakdown/pointer_loss": 0.6730061769485474, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 1.3036609743721783e-05, "loss_breakdown/pointer_loss": 0.08273464441299438, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 8.090180926956236e-06, "loss_breakdown/pointer_loss": 0.058540090918540955, "step": 5930 }, { "epoch": 0.6047086716549184, "loss_breakdown/lm_loss": 6.127288543211762e-06, "loss_breakdown/pointer_loss": 0.06809355318546295, "step": 5930 }, { "epoch": 0.60572841646378, "grad_norm": 3.101372606158599, "learning_rate": 2.1909348441926346e-06, "loss": 0.2754, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 6.983555977058131e-06, "loss_breakdown/pointer_loss": 0.4654863476753235, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 7.060455573082436e-06, "loss_breakdown/pointer_loss": 0.14961619675159454, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 7.5100660978932865e-06, "loss_breakdown/pointer_loss": 0.6192752122879028, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 6.418441444111522e-06, "loss_breakdown/pointer_loss": 0.27531206607818604, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 6.165041668282356e-06, "loss_breakdown/pointer_loss": 0.16435769200325012, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 1.0636935257934965e-05, "loss_breakdown/pointer_loss": 0.2833031117916107, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 8.973604053608142e-06, "loss_breakdown/pointer_loss": 0.6527260541915894, "step": 5940 }, { "epoch": 0.60572841646378, "loss_breakdown/lm_loss": 5.7190918596461415e-06, "loss_breakdown/pointer_loss": 0.12904343008995056, "step": 5940 }, { "epoch": 0.6067481612726415, "grad_norm": 7.8732952008594035, "learning_rate": 2.1852691218130315e-06, "loss": 0.2575, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 4.406399966683239e-05, "loss_breakdown/pointer_loss": 2.6553213596343994, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 1.0715456483012531e-05, "loss_breakdown/pointer_loss": 0.36193567514419556, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 1.1786602044594474e-05, "loss_breakdown/pointer_loss": 0.22275181114673615, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 1.2553192391351331e-05, "loss_breakdown/pointer_loss": 0.7121007442474365, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 1.0819322596944403e-05, "loss_breakdown/pointer_loss": 1.2201141119003296, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 1.1931901099160314e-05, "loss_breakdown/pointer_loss": 0.9589655995368958, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 7.1551830842508934e-06, "loss_breakdown/pointer_loss": 0.43028342723846436, "step": 5950 }, { "epoch": 0.6067481612726415, "loss_breakdown/lm_loss": 9.84366033662809e-06, "loss_breakdown/pointer_loss": 0.6268638968467712, "step": 5950 }, { "epoch": 0.6077679060815031, "grad_norm": 4.2832991579107205, "learning_rate": 2.179603399433428e-06, "loss": 0.2572, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 5.23522840012447e-06, "loss_breakdown/pointer_loss": 0.20947067439556122, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 8.716674528841395e-06, "loss_breakdown/pointer_loss": 0.35777127742767334, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 6.053780907677719e-06, "loss_breakdown/pointer_loss": 0.19173870980739594, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 4.62726848127204e-06, "loss_breakdown/pointer_loss": 0.4207184910774231, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 5.874549515283434e-06, "loss_breakdown/pointer_loss": 0.4770852327346802, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 4.491507297643693e-06, "loss_breakdown/pointer_loss": 0.25613564252853394, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 6.033905265212525e-06, "loss_breakdown/pointer_loss": 0.40356793999671936, "step": 5960 }, { "epoch": 0.6077679060815031, "loss_breakdown/lm_loss": 9.353779205412138e-06, "loss_breakdown/pointer_loss": 2.2133233547210693, "step": 5960 }, { "epoch": 0.6087876508903647, "grad_norm": 1.8184372309908359, "learning_rate": 2.1739376770538245e-06, "loss": 0.2744, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 6.057350674382178e-06, "loss_breakdown/pointer_loss": 0.5061475038528442, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 5.4166184781934135e-06, "loss_breakdown/pointer_loss": 0.26243293285369873, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 6.685402240691474e-06, "loss_breakdown/pointer_loss": 0.5437787175178528, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 5.051667812949745e-06, "loss_breakdown/pointer_loss": 0.4595937132835388, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 6.235423825273756e-06, "loss_breakdown/pointer_loss": 0.8507740497589111, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 6.77494290357572e-06, "loss_breakdown/pointer_loss": 0.2128772735595703, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 5.4041015573602635e-06, "loss_breakdown/pointer_loss": 1.0922006368637085, "step": 5970 }, { "epoch": 0.6087876508903647, "loss_breakdown/lm_loss": 5.3888816182734445e-06, "loss_breakdown/pointer_loss": 0.35275137424468994, "step": 5970 }, { "epoch": 0.6098073956992263, "grad_norm": 6.067849003321441, "learning_rate": 2.168271954674221e-06, "loss": 0.2492, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 6.095506250858307e-06, "loss_breakdown/pointer_loss": 0.10973991453647614, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 6.099466190789826e-06, "loss_breakdown/pointer_loss": 0.3289572596549988, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 6.425315859814873e-06, "loss_breakdown/pointer_loss": 0.6966584324836731, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 8.841140697768424e-06, "loss_breakdown/pointer_loss": 0.3156232237815857, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 5.845170562679414e-06, "loss_breakdown/pointer_loss": 0.7182555794715881, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 1.0255681445414666e-05, "loss_breakdown/pointer_loss": 0.26644179224967957, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 8.495500878780149e-06, "loss_breakdown/pointer_loss": 0.8245599865913391, "step": 5980 }, { "epoch": 0.6098073956992263, "loss_breakdown/lm_loss": 1.1165562682435848e-05, "loss_breakdown/pointer_loss": 0.7171820402145386, "step": 5980 }, { "epoch": 0.6108271405080878, "grad_norm": 2.7105613868891116, "learning_rate": 2.162606232294618e-06, "loss": 0.2867, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 8.717976015759632e-06, "loss_breakdown/pointer_loss": 0.23251676559448242, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 6.649802344327327e-06, "loss_breakdown/pointer_loss": 0.22443294525146484, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 6.234572083485546e-06, "loss_breakdown/pointer_loss": 0.4079478681087494, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 7.972440471348818e-06, "loss_breakdown/pointer_loss": 0.11707407236099243, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 4.929039732814999e-06, "loss_breakdown/pointer_loss": 0.48913922905921936, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 6.493409728136612e-06, "loss_breakdown/pointer_loss": 0.15594398975372314, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 6.5016670305340085e-06, "loss_breakdown/pointer_loss": 0.1627044975757599, "step": 5990 }, { "epoch": 0.6108271405080878, "loss_breakdown/lm_loss": 7.305078906938434e-05, "loss_breakdown/pointer_loss": 0.6380520462989807, "step": 5990 }, { "epoch": 0.6118468853169494, "grad_norm": 18.561085758330496, "learning_rate": 2.156940509915014e-06, "loss": 0.2624, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 2.3835049432818778e-05, "loss_breakdown/pointer_loss": 1.4341344833374023, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 8.897923180484213e-06, "loss_breakdown/pointer_loss": 0.6768954992294312, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 1.0014035069616511e-05, "loss_breakdown/pointer_loss": 0.5289042592048645, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 1.3820849744661245e-05, "loss_breakdown/pointer_loss": 0.3104174733161926, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 8.36646358948201e-06, "loss_breakdown/pointer_loss": 0.6706333160400391, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 6.7732653405983e-06, "loss_breakdown/pointer_loss": 0.328871488571167, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 8.535296728950925e-06, "loss_breakdown/pointer_loss": 0.4168136715888977, "step": 6000 }, { "epoch": 0.6118468853169494, "loss_breakdown/lm_loss": 6.3005759329826105e-06, "loss_breakdown/pointer_loss": 0.8765156269073486, "step": 6000 }, { "epoch": 0.612866630125811, "grad_norm": 15.064836132863608, "learning_rate": 2.151274787535411e-06, "loss": 0.2665, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 5.5948312365217134e-06, "loss_breakdown/pointer_loss": 0.2701033353805542, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 6.216674137249356e-06, "loss_breakdown/pointer_loss": 0.13040117919445038, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 4.965025254932698e-06, "loss_breakdown/pointer_loss": 0.3532789349555969, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 6.347813268803293e-06, "loss_breakdown/pointer_loss": 0.3804064095020294, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 5.636559762933757e-06, "loss_breakdown/pointer_loss": 0.11165406554937363, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 4.980255653208587e-06, "loss_breakdown/pointer_loss": 0.09698089957237244, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 8.690220056450926e-06, "loss_breakdown/pointer_loss": 0.08813242614269257, "step": 6010 }, { "epoch": 0.612866630125811, "loss_breakdown/lm_loss": 5.574959232035326e-06, "loss_breakdown/pointer_loss": 0.2102692574262619, "step": 6010 }, { "epoch": 0.6138863749346726, "grad_norm": 2.400084224629518, "learning_rate": 2.1456090651558075e-06, "loss": 0.3015, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 5.0862158786912914e-06, "loss_breakdown/pointer_loss": 0.4113829731941223, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 5.4415713748312555e-06, "loss_breakdown/pointer_loss": 0.5352901220321655, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 5.629549832519842e-06, "loss_breakdown/pointer_loss": 1.2147538661956787, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 8.611212251707911e-06, "loss_breakdown/pointer_loss": 0.791611909866333, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 4.838972927245777e-06, "loss_breakdown/pointer_loss": 0.3152509927749634, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 5.082225015939912e-06, "loss_breakdown/pointer_loss": 0.29377269744873047, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 4.3844238462042995e-06, "loss_breakdown/pointer_loss": 0.40828028321266174, "step": 6020 }, { "epoch": 0.6138863749346726, "loss_breakdown/lm_loss": 9.866746040643193e-06, "loss_breakdown/pointer_loss": 0.7647548317909241, "step": 6020 }, { "epoch": 0.6149061197435342, "grad_norm": 40.322207012161684, "learning_rate": 2.1399433427762044e-06, "loss": 0.2499, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 6.490198302344652e-06, "loss_breakdown/pointer_loss": 0.1200382262468338, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 5.8968239500245545e-06, "loss_breakdown/pointer_loss": 0.12527185678482056, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 4.875618287769612e-06, "loss_breakdown/pointer_loss": 0.13118711113929749, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 5.896828952245414e-06, "loss_breakdown/pointer_loss": 0.049605756998062134, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 8.861052265274338e-06, "loss_breakdown/pointer_loss": 1.246691107749939, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 4.792175786860753e-06, "loss_breakdown/pointer_loss": 2.2837588787078857, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 5.626623078569537e-06, "loss_breakdown/pointer_loss": 0.039659857749938965, "step": 6030 }, { "epoch": 0.6149061197435342, "loss_breakdown/lm_loss": 1.0259649570798501e-05, "loss_breakdown/pointer_loss": 0.27881160378456116, "step": 6030 }, { "epoch": 0.6159258645523957, "grad_norm": 3.6512347876114006, "learning_rate": 2.1342776203966005e-06, "loss": 0.2995, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 5.2462078201642726e-06, "loss_breakdown/pointer_loss": 0.49482986330986023, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 4.9060836317949e-06, "loss_breakdown/pointer_loss": 0.21286538243293762, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 4.652005827665562e-06, "loss_breakdown/pointer_loss": 0.34071487188339233, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 4.937871835863916e-06, "loss_breakdown/pointer_loss": 0.27821677923202515, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 5.3063517952978145e-06, "loss_breakdown/pointer_loss": 0.5633882284164429, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 5.521320872503566e-06, "loss_breakdown/pointer_loss": 0.359502375125885, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 4.939194241160294e-06, "loss_breakdown/pointer_loss": 0.2050301432609558, "step": 6040 }, { "epoch": 0.6159258645523957, "loss_breakdown/lm_loss": 5.704321665689349e-06, "loss_breakdown/pointer_loss": 0.16926315426826477, "step": 6040 }, { "epoch": 0.6169456093612573, "grad_norm": 19.59097447946724, "learning_rate": 2.1286118980169974e-06, "loss": 0.2696, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 2.1173706045374274e-05, "loss_breakdown/pointer_loss": 2.5209903717041016, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 9.091176252695732e-06, "loss_breakdown/pointer_loss": 0.5344359874725342, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 1.17905656225048e-05, "loss_breakdown/pointer_loss": 1.9314666986465454, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 8.025434908631723e-06, "loss_breakdown/pointer_loss": 0.3599851727485657, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 1.891378815344069e-05, "loss_breakdown/pointer_loss": 0.6480413675308228, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 6.8458198256848846e-06, "loss_breakdown/pointer_loss": 0.4187987148761749, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 9.225216672348324e-06, "loss_breakdown/pointer_loss": 0.5778769254684448, "step": 6050 }, { "epoch": 0.6169456093612573, "loss_breakdown/lm_loss": 4.419650849740719e-06, "loss_breakdown/pointer_loss": 0.4660835266113281, "step": 6050 }, { "epoch": 0.6179653541701189, "grad_norm": 3.797356561917525, "learning_rate": 2.122946175637394e-06, "loss": 0.268, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 5.287539352138992e-06, "loss_breakdown/pointer_loss": 0.12065955996513367, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 4.871647433901671e-06, "loss_breakdown/pointer_loss": 0.17420518398284912, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 6.376830242516007e-06, "loss_breakdown/pointer_loss": 0.044234175235033035, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 8.896144208847545e-06, "loss_breakdown/pointer_loss": 0.164165660738945, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 5.086217242933344e-06, "loss_breakdown/pointer_loss": 0.22885900735855103, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 8.217385584430303e-06, "loss_breakdown/pointer_loss": 0.10696613043546677, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 8.151130714395549e-06, "loss_breakdown/pointer_loss": 0.6680361032485962, "step": 6060 }, { "epoch": 0.6179653541701189, "loss_breakdown/lm_loss": 5.8491364143264946e-06, "loss_breakdown/pointer_loss": 0.39489656686782837, "step": 6060 }, { "epoch": 0.6189850989789805, "grad_norm": 2.858793679116248, "learning_rate": 2.117280453257791e-06, "loss": 0.2863, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 9.059679541678634e-06, "loss_breakdown/pointer_loss": 0.2512122690677643, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 7.147044925659429e-06, "loss_breakdown/pointer_loss": 0.25363051891326904, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 9.471125849813689e-06, "loss_breakdown/pointer_loss": 1.0147488117218018, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 5.764102752436884e-06, "loss_breakdown/pointer_loss": 0.9272308349609375, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 5.717016847484047e-06, "loss_breakdown/pointer_loss": 0.3781198263168335, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 5.911030257266248e-06, "loss_breakdown/pointer_loss": 0.25002020597457886, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 1.2744694686261937e-05, "loss_breakdown/pointer_loss": 0.741631031036377, "step": 6070 }, { "epoch": 0.6189850989789805, "loss_breakdown/lm_loss": 7.426634056173498e-06, "loss_breakdown/pointer_loss": 0.03910992667078972, "step": 6070 }, { "epoch": 0.620004843787842, "grad_norm": 8.194908908879453, "learning_rate": 2.111614730878187e-06, "loss": 0.2345, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 1.2293958207010292e-05, "loss_breakdown/pointer_loss": 0.33501553535461426, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 7.645177902304567e-06, "loss_breakdown/pointer_loss": 0.8263870477676392, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 1.7231994206667878e-05, "loss_breakdown/pointer_loss": 1.0699563026428223, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 8.443846127192955e-06, "loss_breakdown/pointer_loss": 2.583432197570801, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 5.404108378570527e-06, "loss_breakdown/pointer_loss": 0.07752637565135956, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 5.94848506807466e-06, "loss_breakdown/pointer_loss": 0.12689943611621857, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 1.7721738913678564e-05, "loss_breakdown/pointer_loss": 0.24038361012935638, "step": 6080 }, { "epoch": 0.620004843787842, "loss_breakdown/lm_loss": 8.356406397069804e-06, "loss_breakdown/pointer_loss": 0.18890094757080078, "step": 6080 }, { "epoch": 0.6210245885967036, "grad_norm": 7.538540718717486, "learning_rate": 2.105949008498584e-06, "loss": 0.2631, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 5.413261078501819e-06, "loss_breakdown/pointer_loss": 0.22716882824897766, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 5.769654762843857e-06, "loss_breakdown/pointer_loss": 0.6314938068389893, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 7.361283223872306e-06, "loss_breakdown/pointer_loss": 0.1393468677997589, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 4.657070348912384e-06, "loss_breakdown/pointer_loss": 0.3232673406600952, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 6.414644758478971e-06, "loss_breakdown/pointer_loss": 0.5520205497741699, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 5.025820428272709e-06, "loss_breakdown/pointer_loss": 0.20843097567558289, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 6.370943083311431e-06, "loss_breakdown/pointer_loss": 0.09577646851539612, "step": 6090 }, { "epoch": 0.6210245885967036, "loss_breakdown/lm_loss": 6.2544349930249155e-06, "loss_breakdown/pointer_loss": 0.22080421447753906, "step": 6090 }, { "epoch": 0.6220443334055653, "grad_norm": 37.08019523196457, "learning_rate": 2.1002832861189804e-06, "loss": 0.2837, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 3.498867226880975e-05, "loss_breakdown/pointer_loss": 0.7530263662338257, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 1.8508641005610116e-05, "loss_breakdown/pointer_loss": 0.6657706499099731, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 8.626914677734021e-06, "loss_breakdown/pointer_loss": 0.935853123664856, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 1.1998248737654649e-05, "loss_breakdown/pointer_loss": 0.4462956190109253, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 7.453694252035348e-06, "loss_breakdown/pointer_loss": 0.249121755361557, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 8.89602324605221e-06, "loss_breakdown/pointer_loss": 0.6003033518791199, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 1.3388715160544962e-05, "loss_breakdown/pointer_loss": 1.1060690879821777, "step": 6100 }, { "epoch": 0.6220443334055653, "loss_breakdown/lm_loss": 7.627846116520232e-06, "loss_breakdown/pointer_loss": 0.8786249160766602, "step": 6100 }, { "epoch": 0.6230640782144269, "grad_norm": 3.3137702090509014, "learning_rate": 2.094617563739377e-06, "loss": 0.2813, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 5.216014869802166e-06, "loss_breakdown/pointer_loss": 0.12516170740127563, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 6.059717179596191e-06, "loss_breakdown/pointer_loss": 0.12331065535545349, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 5.027941369917244e-06, "loss_breakdown/pointer_loss": 0.06321832537651062, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 6.025965376466047e-06, "loss_breakdown/pointer_loss": 0.15151891112327576, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 6.9935244937369134e-06, "loss_breakdown/pointer_loss": 0.20052368938922882, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 5.066347966931062e-06, "loss_breakdown/pointer_loss": 0.18047893047332764, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 4.851776793657336e-06, "loss_breakdown/pointer_loss": 0.11320440471172333, "step": 6110 }, { "epoch": 0.6230640782144269, "loss_breakdown/lm_loss": 7.565711257484509e-06, "loss_breakdown/pointer_loss": 0.5360057950019836, "step": 6110 }, { "epoch": 0.6240838230232885, "grad_norm": 3.9573578538951346, "learning_rate": 2.0889518413597734e-06, "loss": 0.2938, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 6.306173872872023e-06, "loss_breakdown/pointer_loss": 0.4868611693382263, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 5.6517346820328385e-06, "loss_breakdown/pointer_loss": 0.27369633316993713, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 1.0948447197733913e-05, "loss_breakdown/pointer_loss": 0.4501747190952301, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 6.964376098039793e-06, "loss_breakdown/pointer_loss": 0.08673176169395447, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 5.398653229349293e-06, "loss_breakdown/pointer_loss": 1.219468116760254, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 5.759386112913489e-06, "loss_breakdown/pointer_loss": 0.5306957364082336, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 4.908398750558263e-06, "loss_breakdown/pointer_loss": 0.2782500088214874, "step": 6120 }, { "epoch": 0.6240838230232885, "loss_breakdown/lm_loss": 4.637996880774153e-06, "loss_breakdown/pointer_loss": 0.41794389486312866, "step": 6120 }, { "epoch": 0.62510356783215, "grad_norm": 10.320766602422285, "learning_rate": 2.0832861189801703e-06, "loss": 0.2835, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 7.200152595032705e-06, "loss_breakdown/pointer_loss": 0.23491495847702026, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 1.3732428669754881e-05, "loss_breakdown/pointer_loss": 2.0061376094818115, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 7.188216386566637e-06, "loss_breakdown/pointer_loss": 0.27832263708114624, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 7.363038093899377e-06, "loss_breakdown/pointer_loss": 0.17571735382080078, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 1.3322542145033367e-05, "loss_breakdown/pointer_loss": 0.08169126510620117, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 1.0156372809433378e-05, "loss_breakdown/pointer_loss": 0.09955570101737976, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 7.569676199636888e-06, "loss_breakdown/pointer_loss": 0.5301381945610046, "step": 6130 }, { "epoch": 0.62510356783215, "loss_breakdown/lm_loss": 1.1034482668037526e-05, "loss_breakdown/pointer_loss": 4.9055914878845215, "step": 6130 }, { "epoch": 0.6261233126410116, "grad_norm": 5.916208438936243, "learning_rate": 2.077620396600567e-06, "loss": 0.284, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 5.894533387618139e-06, "loss_breakdown/pointer_loss": 0.36881697177886963, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 4.8477668315172195e-06, "loss_breakdown/pointer_loss": 0.448411762714386, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 5.928941845922964e-06, "loss_breakdown/pointer_loss": 0.3476358652114868, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 7.976287633937318e-06, "loss_breakdown/pointer_loss": 0.15876346826553345, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 5.0405187721480615e-06, "loss_breakdown/pointer_loss": 0.06139037013053894, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 5.876135674043326e-06, "loss_breakdown/pointer_loss": 0.2939811944961548, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 5.029266503697727e-06, "loss_breakdown/pointer_loss": 0.35640090703964233, "step": 6140 }, { "epoch": 0.6261233126410116, "loss_breakdown/lm_loss": 5.142967893334571e-06, "loss_breakdown/pointer_loss": 0.6072667837142944, "step": 6140 }, { "epoch": 0.6271430574498732, "grad_norm": 11.725171759987076, "learning_rate": 2.0719546742209633e-06, "loss": 0.307, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 1.974111182789784e-05, "loss_breakdown/pointer_loss": 1.1601189374923706, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 1.2095099918951746e-05, "loss_breakdown/pointer_loss": 0.9822225570678711, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 9.941000826074742e-06, "loss_breakdown/pointer_loss": 0.6974454522132874, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 7.423710485454649e-06, "loss_breakdown/pointer_loss": 0.26278451085090637, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 7.729732715233695e-06, "loss_breakdown/pointer_loss": 0.5319589972496033, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 1.0561961062194314e-05, "loss_breakdown/pointer_loss": 0.2727239727973938, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 8.678058293298818e-06, "loss_breakdown/pointer_loss": 2.5555453300476074, "step": 6150 }, { "epoch": 0.6271430574498732, "loss_breakdown/lm_loss": 1.0097950507770292e-05, "loss_breakdown/pointer_loss": 0.5894185304641724, "step": 6150 }, { "epoch": 0.6281628022587348, "grad_norm": 2.7489387664158267, "learning_rate": 2.0662889518413602e-06, "loss": 0.2741, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 6.291400950431125e-06, "loss_breakdown/pointer_loss": 0.24558372795581818, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 9.212682925863191e-06, "loss_breakdown/pointer_loss": 0.3130542039871216, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 5.718580268876394e-06, "loss_breakdown/pointer_loss": 0.40439921617507935, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 5.114037321618525e-06, "loss_breakdown/pointer_loss": 0.10890933871269226, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 6.299447704805061e-06, "loss_breakdown/pointer_loss": 0.1255032867193222, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 6.227959147508955e-06, "loss_breakdown/pointer_loss": 2.7450220584869385, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 8.223242730309721e-06, "loss_breakdown/pointer_loss": 0.07820302248001099, "step": 6160 }, { "epoch": 0.6281628022587348, "loss_breakdown/lm_loss": 7.514054232160561e-06, "loss_breakdown/pointer_loss": 0.23876312375068665, "step": 6160 }, { "epoch": 0.6291825470675964, "grad_norm": 4.0110505510393475, "learning_rate": 2.0606232294617563e-06, "loss": 0.2872, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 6.226709501788719e-06, "loss_breakdown/pointer_loss": 0.14691545069217682, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 5.9206518017163035e-06, "loss_breakdown/pointer_loss": 0.37544259428977966, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 6.141679932625266e-06, "loss_breakdown/pointer_loss": 0.19823794066905975, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 6.808196303609293e-06, "loss_breakdown/pointer_loss": 0.4148562550544739, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 6.056717211322393e-06, "loss_breakdown/pointer_loss": 0.8274734616279602, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 6.51457457934157e-06, "loss_breakdown/pointer_loss": 0.20943930745124817, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 6.898741503391648e-06, "loss_breakdown/pointer_loss": 0.1172143816947937, "step": 6170 }, { "epoch": 0.6291825470675964, "loss_breakdown/lm_loss": 5.480257186718518e-06, "loss_breakdown/pointer_loss": 0.08772613108158112, "step": 6170 }, { "epoch": 0.6302022918764579, "grad_norm": 4.823693799194737, "learning_rate": 2.0549575070821532e-06, "loss": 0.2524, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 5.8279565564589575e-06, "loss_breakdown/pointer_loss": 0.24131189286708832, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 6.3815955400059465e-06, "loss_breakdown/pointer_loss": 2.5134122371673584, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 5.7378811106900685e-06, "loss_breakdown/pointer_loss": 0.07400529086589813, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 1.1443459698057268e-05, "loss_breakdown/pointer_loss": 0.599718451499939, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 9.516723366687074e-06, "loss_breakdown/pointer_loss": 0.41103261709213257, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 6.461067187046865e-06, "loss_breakdown/pointer_loss": 0.10062813758850098, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 8.042517947615124e-06, "loss_breakdown/pointer_loss": 1.9868839979171753, "step": 6180 }, { "epoch": 0.6302022918764579, "loss_breakdown/lm_loss": 6.155103619676083e-06, "loss_breakdown/pointer_loss": 0.11540015041828156, "step": 6180 }, { "epoch": 0.6312220366853195, "grad_norm": 4.455939434638421, "learning_rate": 2.0492917847025497e-06, "loss": 0.2971, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 5.84421968596871e-06, "loss_breakdown/pointer_loss": 0.5780394673347473, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 4.760376214107964e-06, "loss_breakdown/pointer_loss": 0.17248135805130005, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 7.015135452093091e-06, "loss_breakdown/pointer_loss": 0.44428750872612, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 1.4570293387805577e-05, "loss_breakdown/pointer_loss": 0.607571542263031, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 6.529917300213128e-06, "loss_breakdown/pointer_loss": 0.49275943636894226, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 4.790187631442677e-06, "loss_breakdown/pointer_loss": 0.09870754927396774, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 5.501561190612847e-06, "loss_breakdown/pointer_loss": 0.5479604005813599, "step": 6190 }, { "epoch": 0.6312220366853195, "loss_breakdown/lm_loss": 6.540527465404011e-06, "loss_breakdown/pointer_loss": 0.33420950174331665, "step": 6190 }, { "epoch": 0.6322417814941811, "grad_norm": 7.4218758881597715, "learning_rate": 2.0436260623229467e-06, "loss": 0.2725, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 2.2169169824337587e-05, "loss_breakdown/pointer_loss": 1.75954008102417, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 1.7360223864670843e-05, "loss_breakdown/pointer_loss": 0.34845781326293945, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 1.057875942933606e-05, "loss_breakdown/pointer_loss": 0.4591582417488098, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 7.5972420745529234e-06, "loss_breakdown/pointer_loss": 0.35408565402030945, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 8.989320122054778e-06, "loss_breakdown/pointer_loss": 0.308073490858078, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 7.988458492036443e-06, "loss_breakdown/pointer_loss": 0.30494147539138794, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 1.1051619367208332e-05, "loss_breakdown/pointer_loss": 0.48641127347946167, "step": 6200 }, { "epoch": 0.6322417814941811, "loss_breakdown/lm_loss": 1.0215097063337453e-05, "loss_breakdown/pointer_loss": 0.8779714107513428, "step": 6200 }, { "epoch": 0.6332615263030427, "grad_norm": 4.170698878960058, "learning_rate": 2.0379603399433427e-06, "loss": 0.2891, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 5.296784820529865e-06, "loss_breakdown/pointer_loss": 1.2320011854171753, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 6.076431418478023e-06, "loss_breakdown/pointer_loss": 0.9046156406402588, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 5.914987013966311e-06, "loss_breakdown/pointer_loss": 0.1630127727985382, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 6.983992534514982e-06, "loss_breakdown/pointer_loss": 0.05314141511917114, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 6.171010227262741e-06, "loss_breakdown/pointer_loss": 0.3489484488964081, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 4.863694812229369e-06, "loss_breakdown/pointer_loss": 0.07558716833591461, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 6.0398820096452255e-06, "loss_breakdown/pointer_loss": 0.07947950810194016, "step": 6210 }, { "epoch": 0.6332615263030427, "loss_breakdown/lm_loss": 4.601440650731092e-06, "loss_breakdown/pointer_loss": 0.0989251658320427, "step": 6210 }, { "epoch": 0.6342812711119042, "grad_norm": 5.085279777814632, "learning_rate": 2.0322946175637397e-06, "loss": 0.2821, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 1.3101803233439568e-05, "loss_breakdown/pointer_loss": 0.6621273756027222, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 6.021872195560718e-06, "loss_breakdown/pointer_loss": 0.22147800028324127, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 7.072419975884259e-06, "loss_breakdown/pointer_loss": 0.34288227558135986, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 7.890971573942807e-06, "loss_breakdown/pointer_loss": 0.46043774485588074, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 6.638855211349437e-06, "loss_breakdown/pointer_loss": 0.5203759670257568, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 5.301258170220535e-06, "loss_breakdown/pointer_loss": 0.21878653764724731, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 1.2655844329856336e-05, "loss_breakdown/pointer_loss": 0.6296871304512024, "step": 6220 }, { "epoch": 0.6342812711119042, "loss_breakdown/lm_loss": 5.555827556236181e-06, "loss_breakdown/pointer_loss": 0.6852931976318359, "step": 6220 }, { "epoch": 0.6353010159207658, "grad_norm": 8.647750593298154, "learning_rate": 2.026628895184136e-06, "loss": 0.2439, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 5.245150077826111e-06, "loss_breakdown/pointer_loss": 0.095131054520607, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 5.594833965005819e-06, "loss_breakdown/pointer_loss": 2.244105100631714, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 2.5165343686239794e-05, "loss_breakdown/pointer_loss": 0.9399352073669434, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 5.932586191192968e-06, "loss_breakdown/pointer_loss": 0.07937340438365936, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 5.0901890062959865e-06, "loss_breakdown/pointer_loss": 0.03896962106227875, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 5.296817562339129e-06, "loss_breakdown/pointer_loss": 0.11159264296293259, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 7.001457106525777e-06, "loss_breakdown/pointer_loss": 0.20758941769599915, "step": 6230 }, { "epoch": 0.6353010159207658, "loss_breakdown/lm_loss": 7.736542102065869e-06, "loss_breakdown/pointer_loss": 0.14686955511569977, "step": 6230 }, { "epoch": 0.6363207607296274, "grad_norm": 4.362818082648536, "learning_rate": 2.0209631728045327e-06, "loss": 0.2606, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 5.765700279880548e-06, "loss_breakdown/pointer_loss": 0.40598827600479126, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 5.428828444564715e-06, "loss_breakdown/pointer_loss": 0.4242435693740845, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 6.747161933162715e-06, "loss_breakdown/pointer_loss": 0.2521533668041229, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 6.206172656675335e-06, "loss_breakdown/pointer_loss": 1.206597089767456, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 0.0002900093386415392, "loss_breakdown/pointer_loss": 0.18289943039417267, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 5.101393071527127e-06, "loss_breakdown/pointer_loss": 1.553898811340332, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 5.524905645870604e-06, "loss_breakdown/pointer_loss": 0.1756984293460846, "step": 6240 }, { "epoch": 0.6363207607296274, "loss_breakdown/lm_loss": 6.395910986611852e-06, "loss_breakdown/pointer_loss": 0.2962755560874939, "step": 6240 }, { "epoch": 0.637340505538489, "grad_norm": 11.779638068650636, "learning_rate": 2.015297450424929e-06, "loss": 0.2591, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 4.9098016461357474e-05, "loss_breakdown/pointer_loss": 0.6803444027900696, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 8.581198017054703e-06, "loss_breakdown/pointer_loss": 0.41019487380981445, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 1.5442774383700453e-05, "loss_breakdown/pointer_loss": 0.7271348834037781, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 1.9859831809299067e-05, "loss_breakdown/pointer_loss": 1.3453832864761353, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 9.841357496043202e-06, "loss_breakdown/pointer_loss": 0.7892239093780518, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 7.839600584702566e-06, "loss_breakdown/pointer_loss": 0.2686011493206024, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 8.270154467027169e-06, "loss_breakdown/pointer_loss": 0.5080864429473877, "step": 6250 }, { "epoch": 0.637340505538489, "loss_breakdown/lm_loss": 6.639512776018819e-06, "loss_breakdown/pointer_loss": 0.12211525440216064, "step": 6250 }, { "epoch": 0.6383602503473506, "grad_norm": 5.697581273706353, "learning_rate": 2.009631728045326e-06, "loss": 0.272, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 6.689559540973278e-06, "loss_breakdown/pointer_loss": 0.28471872210502625, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 9.011988368001767e-06, "loss_breakdown/pointer_loss": 0.020922809839248657, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 6.857107564428588e-06, "loss_breakdown/pointer_loss": 1.0182201862335205, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 6.697192020510556e-06, "loss_breakdown/pointer_loss": 0.08088137954473495, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 4.9789332479122095e-06, "loss_breakdown/pointer_loss": 0.15364482998847961, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 4.810339760297211e-06, "loss_breakdown/pointer_loss": 0.21333828568458557, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 5.441192570287967e-06, "loss_breakdown/pointer_loss": 0.08926768600940704, "step": 6260 }, { "epoch": 0.6383602503473506, "loss_breakdown/lm_loss": 9.298191798734479e-06, "loss_breakdown/pointer_loss": 1.0317786931991577, "step": 6260 }, { "epoch": 0.6393799951562121, "grad_norm": 2.4007937571768094, "learning_rate": 2.0039660056657226e-06, "loss": 0.2807, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 6.3331876845040824e-06, "loss_breakdown/pointer_loss": 0.22760231792926788, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 5.5597001846763305e-06, "loss_breakdown/pointer_loss": 0.12369404733181, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 6.421329089789651e-06, "loss_breakdown/pointer_loss": 0.5068129301071167, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 6.270328412938397e-06, "loss_breakdown/pointer_loss": 0.4648454189300537, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 6.440244760597125e-06, "loss_breakdown/pointer_loss": 0.2003902643918991, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 5.219827471591998e-06, "loss_breakdown/pointer_loss": 0.415080726146698, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 4.519044523476623e-06, "loss_breakdown/pointer_loss": 0.6925761699676514, "step": 6270 }, { "epoch": 0.6393799951562121, "loss_breakdown/lm_loss": 5.049752871855162e-06, "loss_breakdown/pointer_loss": 0.8729845881462097, "step": 6270 }, { "epoch": 0.6403997399650737, "grad_norm": 6.6516459467337, "learning_rate": 1.998300283286119e-06, "loss": 0.2477, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 5.574970600719098e-06, "loss_breakdown/pointer_loss": 0.12161862850189209, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 6.242523340915795e-06, "loss_breakdown/pointer_loss": 0.750816822052002, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 6.03987928116112e-06, "loss_breakdown/pointer_loss": 1.3253178596496582, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 5.626623078569537e-06, "loss_breakdown/pointer_loss": 0.13524779677391052, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 5.475621492223581e-06, "loss_breakdown/pointer_loss": 0.11241380870342255, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 5.06635205965722e-06, "loss_breakdown/pointer_loss": 0.08435545861721039, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 5.6822473197826184e-06, "loss_breakdown/pointer_loss": 2.0084099769592285, "step": 6280 }, { "epoch": 0.6403997399650737, "loss_breakdown/lm_loss": 5.257083557808073e-06, "loss_breakdown/pointer_loss": 0.06240713596343994, "step": 6280 }, { "epoch": 0.6414194847739353, "grad_norm": 8.784613188567084, "learning_rate": 1.9926345609065156e-06, "loss": 0.2859, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 6.222076990525238e-06, "loss_breakdown/pointer_loss": 2.281049966812134, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 5.543751740333391e-06, "loss_breakdown/pointer_loss": 0.28245478868484497, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 6.558605036843801e-06, "loss_breakdown/pointer_loss": 0.47883960604667664, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 7.4118033808190376e-06, "loss_breakdown/pointer_loss": 0.17188045382499695, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 5.792369847767986e-06, "loss_breakdown/pointer_loss": 0.21233177185058594, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 4.982903647032799e-06, "loss_breakdown/pointer_loss": 0.3498753309249878, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 7.1372792262991425e-06, "loss_breakdown/pointer_loss": 0.22369015216827393, "step": 6290 }, { "epoch": 0.6414194847739353, "loss_breakdown/lm_loss": 5.39880556971184e-06, "loss_breakdown/pointer_loss": 0.12376675754785538, "step": 6290 }, { "epoch": 0.6424392295827969, "grad_norm": 8.039035961726047, "learning_rate": 1.986968838526912e-06, "loss": 0.2557, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 3.478348662611097e-05, "loss_breakdown/pointer_loss": 2.1478703022003174, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 1.755518496793229e-05, "loss_breakdown/pointer_loss": 0.5762457847595215, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 9.813574251893442e-06, "loss_breakdown/pointer_loss": 0.9227001070976257, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 7.904224730737042e-06, "loss_breakdown/pointer_loss": 0.5632427930831909, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 9.94867968984181e-06, "loss_breakdown/pointer_loss": 0.40109190344810486, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 7.690636266488582e-06, "loss_breakdown/pointer_loss": 0.5073163509368896, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 6.337257218547165e-06, "loss_breakdown/pointer_loss": 0.3142641484737396, "step": 6300 }, { "epoch": 0.6424392295827969, "loss_breakdown/lm_loss": 1.4479320270766038e-05, "loss_breakdown/pointer_loss": 0.31270575523376465, "step": 6300 }, { "epoch": 0.6434589743916584, "grad_norm": 3.2857217935687184, "learning_rate": 1.981303116147309e-06, "loss": 0.2545, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 5.419992703536991e-06, "loss_breakdown/pointer_loss": 0.06614696979522705, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 6.149151886347681e-06, "loss_breakdown/pointer_loss": 0.8424389958381653, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 5.055761448602425e-06, "loss_breakdown/pointer_loss": 0.26548153162002563, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 8.284875548270065e-06, "loss_breakdown/pointer_loss": 0.24217940866947174, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 5.980269179417519e-06, "loss_breakdown/pointer_loss": 0.27222684025764465, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 5.098140718473587e-06, "loss_breakdown/pointer_loss": 0.2145242989063263, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 4.4782605073123705e-06, "loss_breakdown/pointer_loss": 0.4715810716152191, "step": 6310 }, { "epoch": 0.6434589743916584, "loss_breakdown/lm_loss": 5.7537740758561995e-06, "loss_breakdown/pointer_loss": 0.1602301150560379, "step": 6310 }, { "epoch": 0.64447871920052, "grad_norm": 3.8745659551448832, "learning_rate": 1.9756373937677056e-06, "loss": 0.3056, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 6.925231900822837e-06, "loss_breakdown/pointer_loss": 0.3995998203754425, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 6.711173682560911e-06, "loss_breakdown/pointer_loss": 0.3218068778514862, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 6.670622042292962e-06, "loss_breakdown/pointer_loss": 0.25769278407096863, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 6.720968031004304e-06, "loss_breakdown/pointer_loss": 0.3568479120731354, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 5.5092496040742844e-06, "loss_breakdown/pointer_loss": 0.2431681752204895, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 6.6097022681788076e-06, "loss_breakdown/pointer_loss": 0.24859634041786194, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 7.086718596838182e-06, "loss_breakdown/pointer_loss": 0.5609067678451538, "step": 6320 }, { "epoch": 0.64447871920052, "loss_breakdown/lm_loss": 5.8734126469062176e-06, "loss_breakdown/pointer_loss": 0.45268356800079346, "step": 6320 }, { "epoch": 0.6454984640093816, "grad_norm": 9.966176528390843, "learning_rate": 1.969971671388102e-06, "loss": 0.2399, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 7.6292885751172435e-06, "loss_breakdown/pointer_loss": 1.7704814672470093, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 5.9882213463424705e-06, "loss_breakdown/pointer_loss": 1.1618187427520752, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 6.270318408496678e-06, "loss_breakdown/pointer_loss": 0.17669682204723358, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 6.7074229264108e-06, "loss_breakdown/pointer_loss": 0.048676036298274994, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 6.174965619720751e-06, "loss_breakdown/pointer_loss": 0.19808240234851837, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 7.73647116147913e-06, "loss_breakdown/pointer_loss": 0.10200545191764832, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 8.17498494143365e-06, "loss_breakdown/pointer_loss": 1.093803882598877, "step": 6330 }, { "epoch": 0.6454984640093816, "loss_breakdown/lm_loss": 1.7506807125755586e-05, "loss_breakdown/pointer_loss": 0.17681749165058136, "step": 6330 }, { "epoch": 0.6465182088182433, "grad_norm": 6.963287426901031, "learning_rate": 1.9643059490084986e-06, "loss": 0.2816, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 5.179752861295128e-06, "loss_breakdown/pointer_loss": 0.15827760100364685, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 6.849947567388881e-06, "loss_breakdown/pointer_loss": 0.18680621683597565, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 7.783067303535063e-06, "loss_breakdown/pointer_loss": 0.35178136825561523, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 5.240397513261996e-06, "loss_breakdown/pointer_loss": 0.3923400938510895, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 5.900790256418986e-06, "loss_breakdown/pointer_loss": 0.10110196471214294, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 1.4557365830114577e-05, "loss_breakdown/pointer_loss": 0.20941171050071716, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 7.043196092126891e-06, "loss_breakdown/pointer_loss": 0.15609610080718994, "step": 6340 }, { "epoch": 0.6465182088182433, "loss_breakdown/lm_loss": 9.65266372077167e-06, "loss_breakdown/pointer_loss": 1.0483635663986206, "step": 6340 }, { "epoch": 0.6475379536271049, "grad_norm": 11.275850791711296, "learning_rate": 1.9586402266288955e-06, "loss": 0.2714, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 3.370703780092299e-05, "loss_breakdown/pointer_loss": 2.318848133087158, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 1.3572755960922223e-05, "loss_breakdown/pointer_loss": 1.1137256622314453, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 2.5955663659260608e-05, "loss_breakdown/pointer_loss": 0.6603063941001892, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 9.914891961670946e-06, "loss_breakdown/pointer_loss": 0.29009154438972473, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 1.9361996237421408e-05, "loss_breakdown/pointer_loss": 0.6300808191299438, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 1.0693123840610497e-05, "loss_breakdown/pointer_loss": 0.47848373651504517, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 7.143277343857335e-06, "loss_breakdown/pointer_loss": 0.4673697054386139, "step": 6350 }, { "epoch": 0.6475379536271049, "loss_breakdown/lm_loss": 7.060447842377471e-06, "loss_breakdown/pointer_loss": 0.7013375759124756, "step": 6350 }, { "epoch": 0.6485576984359664, "grad_norm": 6.11314104133247, "learning_rate": 1.952974504249292e-06, "loss": 0.2671, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 6.540497452078853e-06, "loss_breakdown/pointer_loss": 0.2407076209783554, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 4.4762778088625055e-06, "loss_breakdown/pointer_loss": 0.5764071345329285, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 7.1230283538170625e-06, "loss_breakdown/pointer_loss": 0.16016189754009247, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 6.639880666625686e-06, "loss_breakdown/pointer_loss": 0.16321951150894165, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 5.724252332584001e-06, "loss_breakdown/pointer_loss": 0.21931597590446472, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 6.850488716736436e-06, "loss_breakdown/pointer_loss": 0.3658394515514374, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 7.659474249521736e-06, "loss_breakdown/pointer_loss": 0.13397520780563354, "step": 6360 }, { "epoch": 0.6485576984359664, "loss_breakdown/lm_loss": 7.458416803274304e-06, "loss_breakdown/pointer_loss": 0.1385141909122467, "step": 6360 }, { "epoch": 0.649577443244828, "grad_norm": 3.971374440627976, "learning_rate": 1.9473087818696885e-06, "loss": 0.274, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 8.070987860264722e-06, "loss_breakdown/pointer_loss": 1.3082627058029175, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 8.034231541387271e-06, "loss_breakdown/pointer_loss": 0.8420511484146118, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 7.847777851566207e-06, "loss_breakdown/pointer_loss": 0.35088419914245605, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 1.1244505003560334e-05, "loss_breakdown/pointer_loss": 0.30547937750816345, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 9.860887985269073e-06, "loss_breakdown/pointer_loss": 0.23139716684818268, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 6.461742486862931e-06, "loss_breakdown/pointer_loss": 0.7235090732574463, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 7.98389100964414e-06, "loss_breakdown/pointer_loss": 0.32010331749916077, "step": 6370 }, { "epoch": 0.649577443244828, "loss_breakdown/lm_loss": 8.08845379651757e-06, "loss_breakdown/pointer_loss": 1.151379942893982, "step": 6370 }, { "epoch": 0.6505971880536896, "grad_norm": 8.381556778284908, "learning_rate": 1.941643059490085e-06, "loss": 0.2646, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 1.2297972716623917e-05, "loss_breakdown/pointer_loss": 0.055172406136989594, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 2.271863013447728e-05, "loss_breakdown/pointer_loss": 3.744475841522217, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 6.94982099957997e-06, "loss_breakdown/pointer_loss": 0.48044878244400024, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 6.949817361601163e-06, "loss_breakdown/pointer_loss": 0.46122434735298157, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 5.082240477349842e-06, "loss_breakdown/pointer_loss": 0.15274573862552643, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 5.110058282298269e-06, "loss_breakdown/pointer_loss": 0.02370205521583557, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 8.110037015285343e-06, "loss_breakdown/pointer_loss": 0.13163700699806213, "step": 6380 }, { "epoch": 0.6505971880536896, "loss_breakdown/lm_loss": 7.919342351669911e-06, "loss_breakdown/pointer_loss": 0.14931263029575348, "step": 6380 }, { "epoch": 0.6516169328625512, "grad_norm": 20.857215995451472, "learning_rate": 1.935977337110482e-06, "loss": 0.2937, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 6.49428602628177e-06, "loss_breakdown/pointer_loss": 1.1064568758010864, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 6.4195419327006675e-06, "loss_breakdown/pointer_loss": 0.18493688106536865, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 5.821998001920292e-06, "loss_breakdown/pointer_loss": 0.3697413504123688, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 5.383417828852544e-06, "loss_breakdown/pointer_loss": 0.2855495810508728, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 5.4807314882054925e-06, "loss_breakdown/pointer_loss": 0.2000894397497177, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 5.410913217929192e-06, "loss_breakdown/pointer_loss": 0.37751689553260803, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 6.629179551964626e-06, "loss_breakdown/pointer_loss": 0.1795324981212616, "step": 6390 }, { "epoch": 0.6516169328625512, "loss_breakdown/lm_loss": 5.606309514405439e-06, "loss_breakdown/pointer_loss": 0.24543875455856323, "step": 6390 }, { "epoch": 0.6526366776714128, "grad_norm": 156.01044794137928, "learning_rate": 1.9303116147308784e-06, "loss": 0.2613, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 2.0342211428214796e-05, "loss_breakdown/pointer_loss": 1.7667384147644043, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 2.4039736672420986e-05, "loss_breakdown/pointer_loss": 1.9271975755691528, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 1.1273497875663452e-05, "loss_breakdown/pointer_loss": 1.132148265838623, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 1.0503290468477644e-05, "loss_breakdown/pointer_loss": 1.1336973905563354, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 6.318763553281315e-06, "loss_breakdown/pointer_loss": 0.7440086603164673, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 7.428704520862084e-06, "loss_breakdown/pointer_loss": 0.2951987385749817, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 9.36011019803118e-06, "loss_breakdown/pointer_loss": 0.8562972545623779, "step": 6400 }, { "epoch": 0.6526366776714128, "loss_breakdown/lm_loss": 9.054102520167362e-06, "loss_breakdown/pointer_loss": 0.5019588470458984, "step": 6400 }, { "epoch": 0.6536564224802743, "grad_norm": 12.651744822749254, "learning_rate": 1.924645892351275e-06, "loss": 0.2768, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 7.10239464751794e-06, "loss_breakdown/pointer_loss": 0.2473250925540924, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 5.446497198136058e-06, "loss_breakdown/pointer_loss": 0.3248366117477417, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 6.252406819839962e-06, "loss_breakdown/pointer_loss": 0.166958749294281, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 6.753952220606152e-06, "loss_breakdown/pointer_loss": 0.48104673624038696, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 7.041213848424377e-06, "loss_breakdown/pointer_loss": 0.18262748420238495, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 1.0201148143096361e-05, "loss_breakdown/pointer_loss": 1.575016736984253, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 7.3113824328174815e-06, "loss_breakdown/pointer_loss": 0.21687954664230347, "step": 6410 }, { "epoch": 0.6536564224802743, "loss_breakdown/lm_loss": 5.388211320678238e-06, "loss_breakdown/pointer_loss": 0.11457908898591995, "step": 6410 }, { "epoch": 0.6546761672891359, "grad_norm": 4.30152156499631, "learning_rate": 1.9189801699716714e-06, "loss": 0.279, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 7.667712452530395e-06, "loss_breakdown/pointer_loss": 0.3861234188079834, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 6.133436272648396e-06, "loss_breakdown/pointer_loss": 0.3900474011898041, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 6.746356575604295e-06, "loss_breakdown/pointer_loss": 0.4552251100540161, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 1.040133156493539e-05, "loss_breakdown/pointer_loss": 0.7180193066596985, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 6.67279618937755e-06, "loss_breakdown/pointer_loss": 0.810696542263031, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 5.63455523661105e-06, "loss_breakdown/pointer_loss": 0.32563307881355286, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 6.078283149690833e-06, "loss_breakdown/pointer_loss": 0.5343909859657288, "step": 6420 }, { "epoch": 0.6546761672891359, "loss_breakdown/lm_loss": 6.178375315357698e-06, "loss_breakdown/pointer_loss": 0.4996339678764343, "step": 6420 }, { "epoch": 0.6556959120979975, "grad_norm": 8.7608818843057, "learning_rate": 1.913314447592068e-06, "loss": 0.2467, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 5.495492132467916e-06, "loss_breakdown/pointer_loss": 0.36876142024993896, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 7.100816219463013e-06, "loss_breakdown/pointer_loss": 2.387342929840088, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 8.515305125911254e-06, "loss_breakdown/pointer_loss": 0.270599365234375, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 5.5908617468958255e-06, "loss_breakdown/pointer_loss": 3.131171941757202, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 7.001472567935707e-06, "loss_breakdown/pointer_loss": 1.9740571975708008, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 5.9087437875859905e-06, "loss_breakdown/pointer_loss": 0.03900881111621857, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 3.3091575460275635e-05, "loss_breakdown/pointer_loss": 0.15372338891029358, "step": 6430 }, { "epoch": 0.6556959120979975, "loss_breakdown/lm_loss": 1.0756347364804242e-05, "loss_breakdown/pointer_loss": 0.22271591424942017, "step": 6430 }, { "epoch": 0.6567156569068591, "grad_norm": 3.353378940125757, "learning_rate": 1.907648725212465e-06, "loss": 0.2757, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 7.559654932265403e-06, "loss_breakdown/pointer_loss": 0.16625286638736725, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 6.843280061730184e-06, "loss_breakdown/pointer_loss": 0.18009310960769653, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 6.463501904363511e-06, "loss_breakdown/pointer_loss": 0.8524212837219238, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 6.220010163815459e-06, "loss_breakdown/pointer_loss": 0.22982320189476013, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 6.537646186188795e-06, "loss_breakdown/pointer_loss": 0.1200486496090889, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 5.015404894948006e-06, "loss_breakdown/pointer_loss": 0.10986949503421783, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 5.8782870837603696e-06, "loss_breakdown/pointer_loss": 0.14314453303813934, "step": 6440 }, { "epoch": 0.6567156569068591, "loss_breakdown/lm_loss": 5.333907665772131e-06, "loss_breakdown/pointer_loss": 0.4556909203529358, "step": 6440 }, { "epoch": 0.6577354017157206, "grad_norm": 9.510372486873635, "learning_rate": 1.9019830028328614e-06, "loss": 0.2453, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 3.751884287339635e-05, "loss_breakdown/pointer_loss": 2.830597162246704, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 1.5644041923223995e-05, "loss_breakdown/pointer_loss": 0.7504557371139526, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 2.601023152237758e-05, "loss_breakdown/pointer_loss": 1.415018081665039, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 1.0144247426069342e-05, "loss_breakdown/pointer_loss": 0.36258476972579956, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 8.596821317041758e-06, "loss_breakdown/pointer_loss": 0.3543083071708679, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 1.3544354260375258e-05, "loss_breakdown/pointer_loss": 1.2102060317993164, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 8.999937563203275e-06, "loss_breakdown/pointer_loss": 0.7089252471923828, "step": 6450 }, { "epoch": 0.6577354017157206, "loss_breakdown/lm_loss": 6.541188213304849e-06, "loss_breakdown/pointer_loss": 0.5827956795692444, "step": 6450 }, { "epoch": 0.6587551465245822, "grad_norm": 4.916536167843277, "learning_rate": 1.8963172804532579e-06, "loss": 0.2598, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 5.4418437684944365e-06, "loss_breakdown/pointer_loss": 0.13893595337867737, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 5.608723313343944e-06, "loss_breakdown/pointer_loss": 0.19175544381141663, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 6.206757461768575e-06, "loss_breakdown/pointer_loss": 0.08019017428159714, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 7.56967756387894e-06, "loss_breakdown/pointer_loss": 0.03360161930322647, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 7.774968253215775e-06, "loss_breakdown/pointer_loss": 0.9120523929595947, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 6.306103841779986e-06, "loss_breakdown/pointer_loss": 2.6042587757110596, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 8.34452839626465e-06, "loss_breakdown/pointer_loss": 0.07904776185750961, "step": 6460 }, { "epoch": 0.6587551465245822, "loss_breakdown/lm_loss": 6.278279670368647e-06, "loss_breakdown/pointer_loss": 0.07458715885877609, "step": 6460 }, { "epoch": 0.6597748913334438, "grad_norm": 11.198780665850855, "learning_rate": 1.8906515580736546e-06, "loss": 0.2609, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 5.9142348618479446e-06, "loss_breakdown/pointer_loss": 0.9771754741668701, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 7.4795334512600675e-06, "loss_breakdown/pointer_loss": 1.4357881546020508, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 8.468000487482641e-06, "loss_breakdown/pointer_loss": 0.7293401956558228, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 6.938831120351097e-06, "loss_breakdown/pointer_loss": 0.13473230600357056, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 6.927386948518688e-06, "loss_breakdown/pointer_loss": 1.058692216873169, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 7.355505204031942e-06, "loss_breakdown/pointer_loss": 0.2388092577457428, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 7.20310481483466e-06, "loss_breakdown/pointer_loss": 0.5696424841880798, "step": 6470 }, { "epoch": 0.6597748913334438, "loss_breakdown/lm_loss": 6.863175713078817e-06, "loss_breakdown/pointer_loss": 0.4142664074897766, "step": 6470 }, { "epoch": 0.6607946361423054, "grad_norm": 4.7957336537947555, "learning_rate": 1.884985835694051e-06, "loss": 0.2584, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 1.4229100997908972e-05, "loss_breakdown/pointer_loss": 0.9644767045974731, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 5.543172846955713e-06, "loss_breakdown/pointer_loss": 0.1042618677020073, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 8.384216926060617e-06, "loss_breakdown/pointer_loss": 0.0893700122833252, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 1.8519993318477646e-05, "loss_breakdown/pointer_loss": 0.24317726492881775, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 6.699484401906375e-06, "loss_breakdown/pointer_loss": 0.27977025508880615, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 5.610736479866318e-06, "loss_breakdown/pointer_loss": 0.016395345330238342, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 7.136571639421163e-06, "loss_breakdown/pointer_loss": 0.20498524606227875, "step": 6480 }, { "epoch": 0.6607946361423054, "loss_breakdown/lm_loss": 7.212067885120632e-06, "loss_breakdown/pointer_loss": 0.31885063648223877, "step": 6480 }, { "epoch": 0.661814380951167, "grad_norm": 15.504074959472947, "learning_rate": 1.8793201133144478e-06, "loss": 0.2666, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 5.6620842769916635e-06, "loss_breakdown/pointer_loss": 0.5607014298439026, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 6.742749519617064e-06, "loss_breakdown/pointer_loss": 0.17340396344661713, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 4.906834874418564e-06, "loss_breakdown/pointer_loss": 0.2778262197971344, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 6.318769464996876e-06, "loss_breakdown/pointer_loss": 0.2500160336494446, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 5.0935013859998435e-06, "loss_breakdown/pointer_loss": 0.22392705082893372, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 6.544925327034434e-06, "loss_breakdown/pointer_loss": 0.29402631521224976, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 5.057323051005369e-06, "loss_breakdown/pointer_loss": 0.5764464139938354, "step": 6490 }, { "epoch": 0.661814380951167, "loss_breakdown/lm_loss": 5.744495865656063e-06, "loss_breakdown/pointer_loss": 0.24321973323822021, "step": 6490 }, { "epoch": 0.6628341257600285, "grad_norm": 19.258547099557468, "learning_rate": 1.8736543909348443e-06, "loss": 0.2694, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 2.5229426682926714e-05, "loss_breakdown/pointer_loss": 2.5121026039123535, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 2.2868909582030028e-05, "loss_breakdown/pointer_loss": 0.6945494413375854, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 1.1338363037793897e-05, "loss_breakdown/pointer_loss": 0.9837496280670166, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 1.455835717933951e-05, "loss_breakdown/pointer_loss": 0.5618056654930115, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 9.375768058816902e-06, "loss_breakdown/pointer_loss": 1.516955018043518, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 1.3322952327143867e-05, "loss_breakdown/pointer_loss": 0.4325236678123474, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 9.728220902616158e-06, "loss_breakdown/pointer_loss": 0.5136393308639526, "step": 6500 }, { "epoch": 0.6628341257600285, "loss_breakdown/lm_loss": 6.484614004875766e-06, "loss_breakdown/pointer_loss": 0.40634027123451233, "step": 6500 }, { "epoch": 0.6638538705688901, "grad_norm": 11.664630185262874, "learning_rate": 1.8679886685552408e-06, "loss": 0.2657, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 7.841877959435806e-06, "loss_breakdown/pointer_loss": 0.3282458484172821, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 6.577285148523515e-06, "loss_breakdown/pointer_loss": 0.22924813628196716, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 8.483588317176327e-06, "loss_breakdown/pointer_loss": 0.5704649090766907, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 5.336552476364886e-06, "loss_breakdown/pointer_loss": 0.09009912610054016, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 5.7100678532151505e-06, "loss_breakdown/pointer_loss": 0.25137749314308167, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 8.125965905492194e-06, "loss_breakdown/pointer_loss": 0.5088829398155212, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 5.559073088079458e-06, "loss_breakdown/pointer_loss": 0.17020690441131592, "step": 6510 }, { "epoch": 0.6638538705688901, "loss_breakdown/lm_loss": 7.919329618744086e-06, "loss_breakdown/pointer_loss": 2.7603931427001953, "step": 6510 }, { "epoch": 0.6648736153777517, "grad_norm": 4.139067717343606, "learning_rate": 1.8623229461756375e-06, "loss": 0.2829, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 7.759316758892965e-06, "loss_breakdown/pointer_loss": 0.39283645153045654, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 6.740683147654636e-06, "loss_breakdown/pointer_loss": 1.003551959991455, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 5.361850980989402e-06, "loss_breakdown/pointer_loss": 0.18529653549194336, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 7.172771347541129e-06, "loss_breakdown/pointer_loss": 0.2790164053440094, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 1.222109767695656e-05, "loss_breakdown/pointer_loss": 0.7631381750106812, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 5.7071283663390204e-06, "loss_breakdown/pointer_loss": 0.3293072581291199, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 6.514250799227739e-06, "loss_breakdown/pointer_loss": 0.31962141394615173, "step": 6520 }, { "epoch": 0.6648736153777517, "loss_breakdown/lm_loss": 7.81849848863203e-06, "loss_breakdown/pointer_loss": 0.3122439980506897, "step": 6520 }, { "epoch": 0.6658933601866133, "grad_norm": 12.6805466308858, "learning_rate": 1.8566572237960342e-06, "loss": 0.2406, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 2.3044287445372902e-05, "loss_breakdown/pointer_loss": 0.24476978182792664, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 1.3589198715635575e-05, "loss_breakdown/pointer_loss": 2.652721643447876, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 6.019994543748908e-06, "loss_breakdown/pointer_loss": 0.09612751007080078, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 5.878262527403422e-06, "loss_breakdown/pointer_loss": 0.13369843363761902, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 8.28491920401575e-06, "loss_breakdown/pointer_loss": 0.38254666328430176, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 9.949795639840886e-06, "loss_breakdown/pointer_loss": 0.31006157398223877, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 7.911414286354557e-06, "loss_breakdown/pointer_loss": 0.10976850986480713, "step": 6530 }, { "epoch": 0.6658933601866133, "loss_breakdown/lm_loss": 6.476956514234189e-06, "loss_breakdown/pointer_loss": 0.07976581156253815, "step": 6530 }, { "epoch": 0.6669131049954748, "grad_norm": 16.252505124545337, "learning_rate": 1.8509915014164305e-06, "loss": 0.2829, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 7.218684004328679e-06, "loss_breakdown/pointer_loss": 0.0937107503414154, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 4.938930942444131e-06, "loss_breakdown/pointer_loss": 0.35824453830718994, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 6.408094122889452e-06, "loss_breakdown/pointer_loss": 0.35059717297554016, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 8.008766599232331e-06, "loss_breakdown/pointer_loss": 0.3232474625110626, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 5.328160114004277e-06, "loss_breakdown/pointer_loss": 0.2853313088417053, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 5.887115548830479e-06, "loss_breakdown/pointer_loss": 0.19671322405338287, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 6.201083124324214e-06, "loss_breakdown/pointer_loss": 0.3643465042114258, "step": 6540 }, { "epoch": 0.6669131049954748, "loss_breakdown/lm_loss": 6.650473551417235e-06, "loss_breakdown/pointer_loss": 0.22213703393936157, "step": 6540 }, { "epoch": 0.6679328498043364, "grad_norm": 6.771258841812254, "learning_rate": 1.8453257790368273e-06, "loss": 0.2596, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 2.5015875507961027e-05, "loss_breakdown/pointer_loss": 1.3110984563827515, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 1.6233012502198108e-05, "loss_breakdown/pointer_loss": 0.4104396104812622, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 8.719796824152581e-06, "loss_breakdown/pointer_loss": 0.604077935218811, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 5.831114776810864e-06, "loss_breakdown/pointer_loss": 0.3948511481285095, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 8.311280907946639e-06, "loss_breakdown/pointer_loss": 0.7501348853111267, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 7.153902060963446e-06, "loss_breakdown/pointer_loss": 0.233540877699852, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 6.015173130435869e-05, "loss_breakdown/pointer_loss": 0.6038333773612976, "step": 6550 }, { "epoch": 0.6679328498043364, "loss_breakdown/lm_loss": 1.0621357432682998e-05, "loss_breakdown/pointer_loss": 0.4246528744697571, "step": 6550 }, { "epoch": 0.668952594613198, "grad_norm": 7.304329721507647, "learning_rate": 1.839660056657224e-06, "loss": 0.2596, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 5.9107346714881714e-06, "loss_breakdown/pointer_loss": 0.13848750293254852, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 6.7014725573244505e-06, "loss_breakdown/pointer_loss": 0.3880419433116913, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 7.290193480002927e-06, "loss_breakdown/pointer_loss": 0.1753721833229065, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 7.499456387449754e-06, "loss_breakdown/pointer_loss": 0.3138130307197571, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 8.03458533482626e-06, "loss_breakdown/pointer_loss": 0.13969366252422333, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 8.050479664234444e-06, "loss_breakdown/pointer_loss": 0.6131569743156433, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 8.813318345346488e-06, "loss_breakdown/pointer_loss": 0.4818775951862335, "step": 6560 }, { "epoch": 0.668952594613198, "loss_breakdown/lm_loss": 5.654444521496771e-06, "loss_breakdown/pointer_loss": 0.046748898923397064, "step": 6560 }, { "epoch": 0.6699723394220596, "grad_norm": 7.450744622127362, "learning_rate": 1.8339943342776207e-06, "loss": 0.2759, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 6.092392595746787e-06, "loss_breakdown/pointer_loss": 0.42050057649612427, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 6.203052180353552e-06, "loss_breakdown/pointer_loss": 0.593084990978241, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 7.042348443064839e-06, "loss_breakdown/pointer_loss": 0.18699967861175537, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 7.673730578972027e-06, "loss_breakdown/pointer_loss": 2.458493947982788, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 7.427685432048747e-06, "loss_breakdown/pointer_loss": 0.406501442193985, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 8.440518286079168e-06, "loss_breakdown/pointer_loss": 0.35266175866127014, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 7.260633083205903e-06, "loss_breakdown/pointer_loss": 0.9545515179634094, "step": 6570 }, { "epoch": 0.6699723394220596, "loss_breakdown/lm_loss": 3.012622801179532e-05, "loss_breakdown/pointer_loss": 0.4381040632724762, "step": 6570 }, { "epoch": 0.6709920842309212, "grad_norm": 4.392527129643456, "learning_rate": 1.828328611898017e-06, "loss": 0.2556, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 1.2488477295846678e-05, "loss_breakdown/pointer_loss": 0.2651911675930023, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 1.0875583029701374e-05, "loss_breakdown/pointer_loss": 1.1301546096801758, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 6.242516064958181e-06, "loss_breakdown/pointer_loss": 0.17823633551597595, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 7.371000720013399e-06, "loss_breakdown/pointer_loss": 0.18520689010620117, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 6.449139618780464e-06, "loss_breakdown/pointer_loss": 0.2775841951370239, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 1.542111021990422e-05, "loss_breakdown/pointer_loss": 0.46279776096343994, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 6.508745627797907e-06, "loss_breakdown/pointer_loss": 0.38102516531944275, "step": 6580 }, { "epoch": 0.6709920842309212, "loss_breakdown/lm_loss": 8.01869737188099e-06, "loss_breakdown/pointer_loss": 5.205682754516602, "step": 6580 }, { "epoch": 0.6720118290397828, "grad_norm": 5.11546139838119, "learning_rate": 1.8226628895184137e-06, "loss": 0.273, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 6.872493941045832e-06, "loss_breakdown/pointer_loss": 0.07776245474815369, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 6.52024846203858e-06, "loss_breakdown/pointer_loss": 1.3622101545333862, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 6.342726464936277e-06, "loss_breakdown/pointer_loss": 0.45422065258026123, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 5.8633277149056084e-06, "loss_breakdown/pointer_loss": 0.1845318078994751, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 4.881863333139336e-06, "loss_breakdown/pointer_loss": 0.1610274761915207, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 6.802780262660235e-06, "loss_breakdown/pointer_loss": 0.3742794096469879, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 5.402112492447486e-06, "loss_breakdown/pointer_loss": 0.12574762105941772, "step": 6590 }, { "epoch": 0.6720118290397828, "loss_breakdown/lm_loss": 5.15774627274368e-06, "loss_breakdown/pointer_loss": 0.10248251259326935, "step": 6590 }, { "epoch": 0.6730315738486444, "grad_norm": 12.881648999178646, "learning_rate": 1.8169971671388104e-06, "loss": 0.252, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 1.8391847333987243e-05, "loss_breakdown/pointer_loss": 1.640674114227295, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 1.1091107808169909e-05, "loss_breakdown/pointer_loss": 1.0417864322662354, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 9.641104043112136e-06, "loss_breakdown/pointer_loss": 1.1522603034973145, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 9.559016689308919e-06, "loss_breakdown/pointer_loss": 0.9021229147911072, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 7.3501378210494295e-06, "loss_breakdown/pointer_loss": 0.39530402421951294, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 6.978077180974651e-06, "loss_breakdown/pointer_loss": 1.3335423469543457, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 7.108610589057207e-06, "loss_breakdown/pointer_loss": 0.3243773579597473, "step": 6600 }, { "epoch": 0.6730315738486444, "loss_breakdown/lm_loss": 7.122586794139352e-06, "loss_breakdown/pointer_loss": 0.5923680663108826, "step": 6600 }, { "epoch": 0.674051318657506, "grad_norm": 4.970241399377631, "learning_rate": 1.8113314447592071e-06, "loss": 0.2755, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 7.576837560918648e-06, "loss_breakdown/pointer_loss": 1.3678547143936157, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 5.774968485638965e-06, "loss_breakdown/pointer_loss": 0.455522745847702, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 4.929921487928368e-06, "loss_breakdown/pointer_loss": 0.22010105848312378, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 6.068997663533082e-06, "loss_breakdown/pointer_loss": 0.17369511723518372, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 6.386872883012984e-06, "loss_breakdown/pointer_loss": 0.3056442141532898, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 7.95115192886442e-06, "loss_breakdown/pointer_loss": 0.5216397047042847, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 7.4289823714934755e-06, "loss_breakdown/pointer_loss": 0.16195930540561676, "step": 6610 }, { "epoch": 0.674051318657506, "loss_breakdown/lm_loss": 1.0057018698717002e-05, "loss_breakdown/pointer_loss": 4.529489040374756, "step": 6610 }, { "epoch": 0.6750710634663676, "grad_norm": 28.388420856863757, "learning_rate": 1.8056657223796034e-06, "loss": 0.2679, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 7.1958647822611965e-06, "loss_breakdown/pointer_loss": 0.5295179486274719, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 6.7425712586555164e-06, "loss_breakdown/pointer_loss": 0.5178757905960083, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 7.537463261542143e-06, "loss_breakdown/pointer_loss": 0.6012694835662842, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 7.99895769887371e-06, "loss_breakdown/pointer_loss": 0.47733309864997864, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 8.302637070300989e-06, "loss_breakdown/pointer_loss": 0.2618638277053833, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 8.166784937202465e-06, "loss_breakdown/pointer_loss": 0.4091411828994751, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 6.258651865209686e-06, "loss_breakdown/pointer_loss": 1.0073565244674683, "step": 6620 }, { "epoch": 0.6750710634663676, "loss_breakdown/lm_loss": 5.772317763330648e-06, "loss_breakdown/pointer_loss": 0.22137928009033203, "step": 6620 }, { "epoch": 0.6760908082752292, "grad_norm": 11.178168331913128, "learning_rate": 1.8000000000000001e-06, "loss": 0.2388, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 6.3617294472351205e-06, "loss_breakdown/pointer_loss": 0.6803426742553711, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 5.674283784173895e-06, "loss_breakdown/pointer_loss": 0.08161097019910812, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 5.396153937908821e-06, "loss_breakdown/pointer_loss": 0.09663224965333939, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 1.1284881111350842e-05, "loss_breakdown/pointer_loss": 0.15417250990867615, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 7.327281764446525e-06, "loss_breakdown/pointer_loss": 0.12899252772331238, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 6.4014620875241235e-06, "loss_breakdown/pointer_loss": 0.05854006111621857, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 5.690203579433728e-06, "loss_breakdown/pointer_loss": 0.29725655913352966, "step": 6630 }, { "epoch": 0.6760908082752292, "loss_breakdown/lm_loss": 8.71803149493644e-06, "loss_breakdown/pointer_loss": 0.4281195402145386, "step": 6630 }, { "epoch": 0.6771105530840907, "grad_norm": 4.108629825876618, "learning_rate": 1.7943342776203968e-06, "loss": 0.2922, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 6.261854650801979e-06, "loss_breakdown/pointer_loss": 0.1463371217250824, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 6.0782808759540785e-06, "loss_breakdown/pointer_loss": 0.599882960319519, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 4.606496986525599e-06, "loss_breakdown/pointer_loss": 0.2684188485145569, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 5.030585271015298e-06, "loss_breakdown/pointer_loss": 0.23025546967983246, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 5.290189619699959e-06, "loss_breakdown/pointer_loss": 0.21497157216072083, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 5.073504780739313e-06, "loss_breakdown/pointer_loss": 0.28207096457481384, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 4.372109742689645e-06, "loss_breakdown/pointer_loss": 0.10639658570289612, "step": 6640 }, { "epoch": 0.6771105530840907, "loss_breakdown/lm_loss": 0.00014769908739253879, "loss_breakdown/pointer_loss": 0.18541550636291504, "step": 6640 }, { "epoch": 0.6781302978929523, "grad_norm": 5.217312259253999, "learning_rate": 1.7886685552407933e-06, "loss": 0.2732, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 2.7587839213083498e-05, "loss_breakdown/pointer_loss": 1.9337536096572876, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 8.909400094125886e-06, "loss_breakdown/pointer_loss": 1.0033166408538818, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 1.4522828678309452e-05, "loss_breakdown/pointer_loss": 0.6222327351570129, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 2.810265687003266e-05, "loss_breakdown/pointer_loss": 1.1580536365509033, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 7.704165909672156e-06, "loss_breakdown/pointer_loss": 0.7519882917404175, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 7.857548553147353e-06, "loss_breakdown/pointer_loss": 0.20176726579666138, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 7.075774647091748e-06, "loss_breakdown/pointer_loss": 0.3857646584510803, "step": 6650 }, { "epoch": 0.6781302978929523, "loss_breakdown/lm_loss": 8.317230822285637e-06, "loss_breakdown/pointer_loss": 0.9714211821556091, "step": 6650 }, { "epoch": 0.6791500427018139, "grad_norm": 7.270629085251226, "learning_rate": 1.78300283286119e-06, "loss": 0.2652, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 1.4134457160253078e-05, "loss_breakdown/pointer_loss": 0.7928239107131958, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 6.678601039311616e-06, "loss_breakdown/pointer_loss": 0.0975019559264183, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 5.327814051270252e-06, "loss_breakdown/pointer_loss": 0.31753695011138916, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 5.217343641561456e-06, "loss_breakdown/pointer_loss": 0.3362278938293457, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 5.9193375818722416e-06, "loss_breakdown/pointer_loss": 0.15661710500717163, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 9.703342584543861e-06, "loss_breakdown/pointer_loss": 1.859138011932373, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 6.910081992828054e-06, "loss_breakdown/pointer_loss": 0.19763515889644623, "step": 6660 }, { "epoch": 0.6791500427018139, "loss_breakdown/lm_loss": 5.4517890930583235e-06, "loss_breakdown/pointer_loss": 0.38558244705200195, "step": 6660 }, { "epoch": 0.6801697875106755, "grad_norm": 2.6694253840452484, "learning_rate": 1.7773371104815866e-06, "loss": 0.2805, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 6.274307907006005e-06, "loss_breakdown/pointer_loss": 0.6610399484634399, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 5.711872745450819e-06, "loss_breakdown/pointer_loss": 0.4266826808452606, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 6.20062974121538e-06, "loss_breakdown/pointer_loss": 0.15599223971366882, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 6.813227628299501e-06, "loss_breakdown/pointer_loss": 0.1783476620912552, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 6.470595508289989e-06, "loss_breakdown/pointer_loss": 0.5082911252975464, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 6.444283371820347e-06, "loss_breakdown/pointer_loss": 0.149691641330719, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 5.5783771131245885e-06, "loss_breakdown/pointer_loss": 0.3439443111419678, "step": 6670 }, { "epoch": 0.6801697875106755, "loss_breakdown/lm_loss": 5.863991646037903e-06, "loss_breakdown/pointer_loss": 0.16146159172058105, "step": 6670 }, { "epoch": 0.681189532319537, "grad_norm": 9.285596183275441, "learning_rate": 1.771671388101983e-06, "loss": 0.2458, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 7.517994617955992e-06, "loss_breakdown/pointer_loss": 0.8245308995246887, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 7.259746325871674e-06, "loss_breakdown/pointer_loss": 0.1567273736000061, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 6.894164926052326e-06, "loss_breakdown/pointer_loss": 1.0176382064819336, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 4.804094714927487e-06, "loss_breakdown/pointer_loss": 0.2881960868835449, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 7.009440196270589e-06, "loss_breakdown/pointer_loss": 3.130441427230835, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 7.490197276638355e-06, "loss_breakdown/pointer_loss": 2.7300403118133545, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 6.862389454909135e-06, "loss_breakdown/pointer_loss": 0.24323347210884094, "step": 6680 }, { "epoch": 0.681189532319537, "loss_breakdown/lm_loss": 5.714025974157266e-06, "loss_breakdown/pointer_loss": 0.06770110130310059, "step": 6680 }, { "epoch": 0.6822092771283986, "grad_norm": 3.245764808256035, "learning_rate": 1.7660056657223798e-06, "loss": 0.287, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 4.847082436754135e-06, "loss_breakdown/pointer_loss": 0.3407592177391052, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 5.77080027142074e-06, "loss_breakdown/pointer_loss": 0.3766479194164276, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 5.68486939300783e-06, "loss_breakdown/pointer_loss": 0.7470729351043701, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 6.012851827108534e-06, "loss_breakdown/pointer_loss": 0.4737793803215027, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 6.646901965723373e-06, "loss_breakdown/pointer_loss": 0.33985331654548645, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 6.286224106588634e-06, "loss_breakdown/pointer_loss": 0.8621746301651001, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 4.54808332506218e-06, "loss_breakdown/pointer_loss": 0.15840867161750793, "step": 6690 }, { "epoch": 0.6822092771283986, "loss_breakdown/lm_loss": 5.83422661293298e-06, "loss_breakdown/pointer_loss": 0.22168290615081787, "step": 6690 }, { "epoch": 0.6832290219372602, "grad_norm": 8.517586412794719, "learning_rate": 1.7603399433427765e-06, "loss": 0.2853, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 2.3022184905130416e-05, "loss_breakdown/pointer_loss": 1.387129306793213, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 1.139896903623594e-05, "loss_breakdown/pointer_loss": 1.0232374668121338, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 8.526555575372186e-06, "loss_breakdown/pointer_loss": 1.2026433944702148, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 8.194839210773353e-06, "loss_breakdown/pointer_loss": 0.44569212198257446, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 1.0052192010334693e-05, "loss_breakdown/pointer_loss": 0.33116260170936584, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 8.385127330257092e-06, "loss_breakdown/pointer_loss": 0.62480628490448, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 6.252678304008441e-06, "loss_breakdown/pointer_loss": 0.6974650621414185, "step": 6700 }, { "epoch": 0.6832290219372602, "loss_breakdown/lm_loss": 6.502986707346281e-06, "loss_breakdown/pointer_loss": 0.47478049993515015, "step": 6700 }, { "epoch": 0.6842487667461218, "grad_norm": 2.9271249772947265, "learning_rate": 1.7546742209631728e-06, "loss": 0.2598, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 5.8682057897385675e-06, "loss_breakdown/pointer_loss": 0.4763350486755371, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 6.756068160029827e-06, "loss_breakdown/pointer_loss": 0.3961330056190491, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 5.3908561312709935e-06, "loss_breakdown/pointer_loss": 0.23467037081718445, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 5.451787728816271e-06, "loss_breakdown/pointer_loss": 0.08714419603347778, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 6.1829200603824575e-06, "loss_breakdown/pointer_loss": 0.2557516098022461, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 4.2557371671136934e-06, "loss_breakdown/pointer_loss": 0.2983129918575287, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 6.294181275734445e-06, "loss_breakdown/pointer_loss": 0.24241912364959717, "step": 6710 }, { "epoch": 0.6842487667461218, "loss_breakdown/lm_loss": 7.78823141445173e-06, "loss_breakdown/pointer_loss": 0.32429200410842896, "step": 6710 }, { "epoch": 0.6852685115549834, "grad_norm": 4.509152779286802, "learning_rate": 1.7490084985835695e-06, "loss": 0.2884, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 6.352250238705892e-06, "loss_breakdown/pointer_loss": 0.10626202821731567, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 9.881833648250904e-06, "loss_breakdown/pointer_loss": 0.17666606605052948, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 8.848714969644789e-06, "loss_breakdown/pointer_loss": 0.8689549565315247, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 4.292164248909103e-06, "loss_breakdown/pointer_loss": 0.1264077126979828, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 6.898025731061352e-06, "loss_breakdown/pointer_loss": 0.26960015296936035, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 8.952500138548203e-06, "loss_breakdown/pointer_loss": 0.43404847383499146, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 5.135594165039947e-06, "loss_breakdown/pointer_loss": 0.3527442216873169, "step": 6720 }, { "epoch": 0.6852685115549834, "loss_breakdown/lm_loss": 7.153385467972839e-06, "loss_breakdown/pointer_loss": 0.03946312144398689, "step": 6720 }, { "epoch": 0.6862882563638449, "grad_norm": 5.533916325181651, "learning_rate": 1.7433427762039662e-06, "loss": 0.2479, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 6.532590305141639e-06, "loss_breakdown/pointer_loss": 1.8725152015686035, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 6.723304068145808e-06, "loss_breakdown/pointer_loss": 0.24272502958774567, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 7.204105713753961e-06, "loss_breakdown/pointer_loss": 0.12309932708740234, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 1.012051052384777e-05, "loss_breakdown/pointer_loss": 0.02162053808569908, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 9.258414138457738e-06, "loss_breakdown/pointer_loss": 0.3259102702140808, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 1.527749191154726e-05, "loss_breakdown/pointer_loss": 0.26601022481918335, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 1.2369535397738218e-05, "loss_breakdown/pointer_loss": 0.4487559199333191, "step": 6730 }, { "epoch": 0.6862882563638449, "loss_breakdown/lm_loss": 1.0378823390055913e-05, "loss_breakdown/pointer_loss": 0.10101032257080078, "step": 6730 }, { "epoch": 0.6873080011727065, "grad_norm": 4.048657111551293, "learning_rate": 1.737677053824363e-06, "loss": 0.2696, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 5.46633555131848e-06, "loss_breakdown/pointer_loss": 0.27230969071388245, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 5.443817371997284e-06, "loss_breakdown/pointer_loss": 0.38108664751052856, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 6.212725111254258e-06, "loss_breakdown/pointer_loss": 0.3735330402851105, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 5.583642177953152e-06, "loss_breakdown/pointer_loss": 0.3272547125816345, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 4.434548827703111e-06, "loss_breakdown/pointer_loss": 0.20840102434158325, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 1.1531396921782289e-05, "loss_breakdown/pointer_loss": 0.9576440453529358, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 4.541836005955702e-06, "loss_breakdown/pointer_loss": 0.15784569084644318, "step": 6740 }, { "epoch": 0.6873080011727065, "loss_breakdown/lm_loss": 5.953769232291961e-06, "loss_breakdown/pointer_loss": 0.3637621998786926, "step": 6740 }, { "epoch": 0.6883277459815681, "grad_norm": 6.63994537766466, "learning_rate": 1.7320113314447592e-06, "loss": 0.2855, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 1.8000851923716255e-05, "loss_breakdown/pointer_loss": 1.0205366611480713, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 1.604159660928417e-05, "loss_breakdown/pointer_loss": 0.5904262065887451, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 6.752313765900908e-06, "loss_breakdown/pointer_loss": 0.9233285188674927, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 8.514192813890986e-06, "loss_breakdown/pointer_loss": 0.42933332920074463, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 7.06345372236683e-06, "loss_breakdown/pointer_loss": 0.48295146226882935, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 8.507509846822359e-06, "loss_breakdown/pointer_loss": 1.4961504936218262, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 6.491999101854162e-06, "loss_breakdown/pointer_loss": 0.5724210143089294, "step": 6750 }, { "epoch": 0.6883277459815681, "loss_breakdown/lm_loss": 7.050708973110886e-06, "loss_breakdown/pointer_loss": 0.9717751741409302, "step": 6750 }, { "epoch": 0.6893474907904297, "grad_norm": 2.3349776852731434, "learning_rate": 1.726345609065156e-06, "loss": 0.2638, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 5.495490313478513e-06, "loss_breakdown/pointer_loss": 0.2675490975379944, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 1.956286359927617e-05, "loss_breakdown/pointer_loss": 2.039757251739502, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 1.9215083739254624e-05, "loss_breakdown/pointer_loss": 1.229034185409546, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 7.764352631056681e-06, "loss_breakdown/pointer_loss": 0.10128170996904373, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 6.184495759953279e-06, "loss_breakdown/pointer_loss": 0.10764608532190323, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 4.577597792376764e-06, "loss_breakdown/pointer_loss": 0.0949619859457016, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 5.695498202840099e-06, "loss_breakdown/pointer_loss": 1.1848855018615723, "step": 6760 }, { "epoch": 0.6893474907904297, "loss_breakdown/lm_loss": 1.1582761544559617e-05, "loss_breakdown/pointer_loss": 0.20345953106880188, "step": 6760 }, { "epoch": 0.6903672355992913, "grad_norm": 4.217997900190588, "learning_rate": 1.7206798866855527e-06, "loss": 0.2769, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 7.110846127034165e-06, "loss_breakdown/pointer_loss": 0.304135262966156, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 4.0764087316347286e-05, "loss_breakdown/pointer_loss": 0.9485127925872803, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 6.656377991021145e-06, "loss_breakdown/pointer_loss": 0.9032496213912964, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 5.189522653381573e-06, "loss_breakdown/pointer_loss": 0.47860032320022583, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 8.918064850149676e-06, "loss_breakdown/pointer_loss": 0.2550959587097168, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 7.869953151384834e-06, "loss_breakdown/pointer_loss": 1.6906954050064087, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 6.486021902674111e-06, "loss_breakdown/pointer_loss": 0.5441898107528687, "step": 6770 }, { "epoch": 0.6903672355992913, "loss_breakdown/lm_loss": 6.3900670284056105e-06, "loss_breakdown/pointer_loss": 0.21176961064338684, "step": 6770 }, { "epoch": 0.6913869804081528, "grad_norm": 9.692851104029552, "learning_rate": 1.7150141643059492e-06, "loss": 0.2435, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 8.45180511532817e-06, "loss_breakdown/pointer_loss": 1.166888952255249, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 1.0283434676239267e-05, "loss_breakdown/pointer_loss": 1.735945224761963, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 4.5934934860270005e-06, "loss_breakdown/pointer_loss": 0.09741492569446564, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 8.789554158283863e-06, "loss_breakdown/pointer_loss": 0.10174502432346344, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 9.405403034179471e-06, "loss_breakdown/pointer_loss": 0.34792810678482056, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 6.341863809211645e-06, "loss_breakdown/pointer_loss": 0.9061861038208008, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 6.127286724222358e-06, "loss_breakdown/pointer_loss": 0.16119250655174255, "step": 6780 }, { "epoch": 0.6913869804081528, "loss_breakdown/lm_loss": 1.1399898539821152e-05, "loss_breakdown/pointer_loss": 0.4837167263031006, "step": 6780 }, { "epoch": 0.6924067252170144, "grad_norm": 5.663777830628975, "learning_rate": 1.7093484419263457e-06, "loss": 0.2785, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 6.552968443429563e-06, "loss_breakdown/pointer_loss": 0.6332523822784424, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 5.818408681079745e-06, "loss_breakdown/pointer_loss": 0.3681833744049072, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 5.435892944660736e-06, "loss_breakdown/pointer_loss": 0.18384280800819397, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 5.19530249221134e-06, "loss_breakdown/pointer_loss": 0.22858458757400513, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 8.84366818354465e-06, "loss_breakdown/pointer_loss": 0.18701989948749542, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 6.948442660359433e-06, "loss_breakdown/pointer_loss": 0.3719143867492676, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 7.0976061579131056e-06, "loss_breakdown/pointer_loss": 0.7593026757240295, "step": 6790 }, { "epoch": 0.6924067252170144, "loss_breakdown/lm_loss": 7.726735020696651e-06, "loss_breakdown/pointer_loss": 0.5650732517242432, "step": 6790 }, { "epoch": 0.693426470025876, "grad_norm": 6.432817883723301, "learning_rate": 1.7036827195467424e-06, "loss": 0.288, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 2.0290628526709042e-05, "loss_breakdown/pointer_loss": 1.1853218078613281, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 9.04783792066155e-06, "loss_breakdown/pointer_loss": 0.2610948085784912, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 1.048431295203045e-05, "loss_breakdown/pointer_loss": 0.6210165023803711, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 6.6529601099318825e-06, "loss_breakdown/pointer_loss": 0.9441723227500916, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 1.1848534086311702e-05, "loss_breakdown/pointer_loss": 0.5027092695236206, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 9.831136594584677e-06, "loss_breakdown/pointer_loss": 0.8145928382873535, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 7.947997801238671e-06, "loss_breakdown/pointer_loss": 0.9602802395820618, "step": 6800 }, { "epoch": 0.693426470025876, "loss_breakdown/lm_loss": 1.1274269127170555e-05, "loss_breakdown/pointer_loss": 0.4595752954483032, "step": 6800 }, { "epoch": 0.6944462148347376, "grad_norm": 3.1159281260005094, "learning_rate": 1.6980169971671389e-06, "loss": 0.2571, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 1.9457984308246523e-05, "loss_breakdown/pointer_loss": 1.656602382659912, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 5.33853972228826e-06, "loss_breakdown/pointer_loss": 0.7648419141769409, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 7.386412562482292e-06, "loss_breakdown/pointer_loss": 0.4912620782852173, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 5.7966908570961095e-06, "loss_breakdown/pointer_loss": 1.0332213640213013, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 7.1604058575758245e-06, "loss_breakdown/pointer_loss": 0.2041076421737671, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 8.682231964485254e-06, "loss_breakdown/pointer_loss": 0.3011801838874817, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 6.408607077901252e-06, "loss_breakdown/pointer_loss": 0.1401454210281372, "step": 6810 }, { "epoch": 0.6944462148347376, "loss_breakdown/lm_loss": 4.970976533513749e-06, "loss_breakdown/pointer_loss": 0.10673093795776367, "step": 6810 }, { "epoch": 0.6954659596435991, "grad_norm": 18.492233256058757, "learning_rate": 1.6923512747875356e-06, "loss": 0.297, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 5.867097570444457e-06, "loss_breakdown/pointer_loss": 0.27568313479423523, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 6.799765287723858e-06, "loss_breakdown/pointer_loss": 0.263065367937088, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 9.326123290520627e-06, "loss_breakdown/pointer_loss": 0.936659038066864, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 6.877099167468259e-06, "loss_breakdown/pointer_loss": 0.5041297078132629, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 6.620725798711646e-06, "loss_breakdown/pointer_loss": 0.34931373596191406, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 5.344288638298167e-06, "loss_breakdown/pointer_loss": 0.8693963289260864, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 6.330802989396034e-06, "loss_breakdown/pointer_loss": 0.36853742599487305, "step": 6820 }, { "epoch": 0.6954659596435991, "loss_breakdown/lm_loss": 9.186295756080654e-06, "loss_breakdown/pointer_loss": 1.1544406414031982, "step": 6820 }, { "epoch": 0.6964857044524607, "grad_norm": 26.64596851303513, "learning_rate": 1.6866855524079321e-06, "loss": 0.2629, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 7.28353143131244e-06, "loss_breakdown/pointer_loss": 0.057344093918800354, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 6.302132987912046e-06, "loss_breakdown/pointer_loss": 0.4537302553653717, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 7.001460744504584e-06, "loss_breakdown/pointer_loss": 0.1580812931060791, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 6.572327947651502e-06, "loss_breakdown/pointer_loss": 0.4829385280609131, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 6.397497600119095e-06, "loss_breakdown/pointer_loss": 0.5158173441886902, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 1.0982829735439736e-05, "loss_breakdown/pointer_loss": 0.462444543838501, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 6.739203399774851e-06, "loss_breakdown/pointer_loss": 0.12479856610298157, "step": 6830 }, { "epoch": 0.6964857044524607, "loss_breakdown/lm_loss": 8.20542118162848e-06, "loss_breakdown/pointer_loss": 2.3221726417541504, "step": 6830 }, { "epoch": 0.6975054492613224, "grad_norm": 2.4643669898476537, "learning_rate": 1.6810198300283286e-06, "loss": 0.272, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 5.450646312965546e-06, "loss_breakdown/pointer_loss": 0.47950226068496704, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 7.019124041107716e-06, "loss_breakdown/pointer_loss": 0.4953722357749939, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 5.0981275308004115e-06, "loss_breakdown/pointer_loss": 0.22284135222434998, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 6.819286227255361e-06, "loss_breakdown/pointer_loss": 0.41967833042144775, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 6.983708772168029e-06, "loss_breakdown/pointer_loss": 0.30094432830810547, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 5.062372565589612e-06, "loss_breakdown/pointer_loss": 0.09969767928123474, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 5.474033059726935e-06, "loss_breakdown/pointer_loss": 0.4380146563053131, "step": 6840 }, { "epoch": 0.6975054492613224, "loss_breakdown/lm_loss": 6.796446086809738e-06, "loss_breakdown/pointer_loss": 1.109081745147705, "step": 6840 }, { "epoch": 0.698525194070184, "grad_norm": 20.112933897399074, "learning_rate": 1.6753541076487253e-06, "loss": 0.2485, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 3.4898264857474715e-05, "loss_breakdown/pointer_loss": 2.3761978149414062, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 1.2648036317841616e-05, "loss_breakdown/pointer_loss": 1.533920407295227, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 9.756176041264553e-06, "loss_breakdown/pointer_loss": 1.193946361541748, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 8.386576155317016e-06, "loss_breakdown/pointer_loss": 0.6209121942520142, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 1.135017373599112e-05, "loss_breakdown/pointer_loss": 0.5027146339416504, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 5.02981174577144e-06, "loss_breakdown/pointer_loss": 0.6637303829193115, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 9.412417057319544e-06, "loss_breakdown/pointer_loss": 0.17335966229438782, "step": 6850 }, { "epoch": 0.698525194070184, "loss_breakdown/lm_loss": 7.466064744221512e-06, "loss_breakdown/pointer_loss": 0.2958258390426636, "step": 6850 }, { "epoch": 0.6995449388790456, "grad_norm": 15.9662081864842, "learning_rate": 1.669688385269122e-06, "loss": 0.2693, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 5.370327016862575e-06, "loss_breakdown/pointer_loss": 1.0760060548782349, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 5.612143468169961e-06, "loss_breakdown/pointer_loss": 0.12920820713043213, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 1.5295834600692615e-05, "loss_breakdown/pointer_loss": 0.12093275785446167, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 8.459717719233595e-06, "loss_breakdown/pointer_loss": 0.6787343621253967, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 9.711341590445954e-06, "loss_breakdown/pointer_loss": 0.48561766743659973, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 7.17234615876805e-06, "loss_breakdown/pointer_loss": 0.1463325023651123, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 5.45178727406892e-06, "loss_breakdown/pointer_loss": 0.12950360774993896, "step": 6860 }, { "epoch": 0.6995449388790456, "loss_breakdown/lm_loss": 6.298143489402719e-06, "loss_breakdown/pointer_loss": 0.08112268894910812, "step": 6860 }, { "epoch": 0.7005646836879071, "grad_norm": 4.591873812658646, "learning_rate": 1.6640226628895183e-06, "loss": 0.2941, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 7.586522769997828e-06, "loss_breakdown/pointer_loss": 0.6597709059715271, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 4.939919563184958e-06, "loss_breakdown/pointer_loss": 0.30623000860214233, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 5.6266012506966945e-06, "loss_breakdown/pointer_loss": 0.41413888335227966, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 5.9126932683284394e-06, "loss_breakdown/pointer_loss": 0.2559514045715332, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 8.130369678838179e-06, "loss_breakdown/pointer_loss": 0.5619498491287231, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 6.595726063096663e-06, "loss_breakdown/pointer_loss": 0.13454195857048035, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 7.247788744280115e-06, "loss_breakdown/pointer_loss": 0.700074315071106, "step": 6870 }, { "epoch": 0.7005646836879071, "loss_breakdown/lm_loss": 5.466326911118813e-06, "loss_breakdown/pointer_loss": 0.5022464394569397, "step": 6870 }, { "epoch": 0.7015844284967687, "grad_norm": 7.533559022914552, "learning_rate": 1.658356940509915e-06, "loss": 0.2507, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 7.955109140311833e-06, "loss_breakdown/pointer_loss": 0.1734006106853485, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 1.3243693501863163e-05, "loss_breakdown/pointer_loss": 0.31861865520477295, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 7.100776656443486e-06, "loss_breakdown/pointer_loss": 0.4641225039958954, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 4.990846264263382e-06, "loss_breakdown/pointer_loss": 0.04086354374885559, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 6.786902758904034e-06, "loss_breakdown/pointer_loss": 0.11279594898223877, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 6.798800313845277e-06, "loss_breakdown/pointer_loss": 0.042452000081539154, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 8.193532266886905e-06, "loss_breakdown/pointer_loss": 0.5288321375846863, "step": 6880 }, { "epoch": 0.7015844284967687, "loss_breakdown/lm_loss": 1.3080903954687528e-05, "loss_breakdown/pointer_loss": 0.15019097924232483, "step": 6880 }, { "epoch": 0.7026041733056303, "grad_norm": 8.150999492242276, "learning_rate": 1.6526912181303118e-06, "loss": 0.2616, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 8.442838407063391e-06, "loss_breakdown/pointer_loss": 0.34544917941093445, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 7.10739095666213e-06, "loss_breakdown/pointer_loss": 0.26703178882598877, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 6.843067239969969e-06, "loss_breakdown/pointer_loss": 0.11853668838739395, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 6.367942660290282e-06, "loss_breakdown/pointer_loss": 0.07168406993150711, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 5.689206318493234e-06, "loss_breakdown/pointer_loss": 0.2562839388847351, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 6.681320428469917e-06, "loss_breakdown/pointer_loss": 0.21785210072994232, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 8.762291145103518e-06, "loss_breakdown/pointer_loss": 0.24194064736366272, "step": 6890 }, { "epoch": 0.7026041733056303, "loss_breakdown/lm_loss": 1.1531077689141966e-05, "loss_breakdown/pointer_loss": 0.5983878970146179, "step": 6890 }, { "epoch": 0.7036239181144919, "grad_norm": 6.133744619476631, "learning_rate": 1.6470254957507085e-06, "loss": 0.2577, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 2.462540396663826e-05, "loss_breakdown/pointer_loss": 1.2057123184204102, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 1.529415931145195e-05, "loss_breakdown/pointer_loss": 0.46047472953796387, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 1.3020057849644218e-05, "loss_breakdown/pointer_loss": 0.9340707063674927, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 1.0916792234638706e-05, "loss_breakdown/pointer_loss": 1.084773302078247, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 4.7146848373813555e-05, "loss_breakdown/pointer_loss": 0.3078243136405945, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 8.24332073534606e-06, "loss_breakdown/pointer_loss": 0.2696281969547272, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 7.460147116944427e-06, "loss_breakdown/pointer_loss": 0.3312670588493347, "step": 6900 }, { "epoch": 0.7036239181144919, "loss_breakdown/lm_loss": 8.259965397883207e-06, "loss_breakdown/pointer_loss": 0.20901793241500854, "step": 6900 }, { "epoch": 0.7046436629233535, "grad_norm": 2.9798241589261614, "learning_rate": 1.641359773371105e-06, "loss": 0.2581, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 5.972763119643787e-06, "loss_breakdown/pointer_loss": 0.19556748867034912, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 5.339645213098265e-06, "loss_breakdown/pointer_loss": 0.2612612247467041, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 6.989169378357474e-06, "loss_breakdown/pointer_loss": 0.11320852488279343, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 5.8093955885851756e-06, "loss_breakdown/pointer_loss": 0.39514511823654175, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 7.164365342759993e-06, "loss_breakdown/pointer_loss": 0.12014477699995041, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 5.1219790293544065e-06, "loss_breakdown/pointer_loss": 0.07907669246196747, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 7.77234981796937e-06, "loss_breakdown/pointer_loss": 0.051246773451566696, "step": 6910 }, { "epoch": 0.7046436629233535, "loss_breakdown/lm_loss": 8.884920134732965e-06, "loss_breakdown/pointer_loss": 0.40898844599723816, "step": 6910 }, { "epoch": 0.705663407732215, "grad_norm": 3.153382175468332, "learning_rate": 1.6356940509915015e-06, "loss": 0.2746, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 8.087688911473379e-06, "loss_breakdown/pointer_loss": 0.5052194595336914, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 5.806753506476525e-06, "loss_breakdown/pointer_loss": 0.3256149888038635, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 6.942794243514072e-06, "loss_breakdown/pointer_loss": 0.37144526839256287, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 8.70412895892514e-06, "loss_breakdown/pointer_loss": 0.24239318072795868, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 7.725332579866517e-06, "loss_breakdown/pointer_loss": 0.37955909967422485, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 1.041695759340655e-05, "loss_breakdown/pointer_loss": 0.2558469772338867, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 5.539198355108965e-06, "loss_breakdown/pointer_loss": 0.2832711338996887, "step": 6920 }, { "epoch": 0.705663407732215, "loss_breakdown/lm_loss": 5.886825420020614e-06, "loss_breakdown/pointer_loss": 0.7154697775840759, "step": 6920 }, { "epoch": 0.7066831525410766, "grad_norm": 4.603721539857981, "learning_rate": 1.6300283286118982e-06, "loss": 0.2489, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 6.500787549157394e-06, "loss_breakdown/pointer_loss": 1.033817172050476, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 6.576309260708513e-06, "loss_breakdown/pointer_loss": 0.6345261931419373, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 6.73123076921911e-06, "loss_breakdown/pointer_loss": 2.4150044918060303, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 6.389518603100441e-06, "loss_breakdown/pointer_loss": 0.40641576051712036, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 6.377599675033707e-06, "loss_breakdown/pointer_loss": 0.44004136323928833, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 4.661044386011781e-06, "loss_breakdown/pointer_loss": 0.04808422923088074, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 5.074296495877206e-06, "loss_breakdown/pointer_loss": 0.28011369705200195, "step": 6930 }, { "epoch": 0.7066831525410766, "loss_breakdown/lm_loss": 5.567018888541497e-06, "loss_breakdown/pointer_loss": 0.23241575062274933, "step": 6930 }, { "epoch": 0.7077028973499382, "grad_norm": 8.052078560064928, "learning_rate": 1.6243626062322947e-06, "loss": 0.2785, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 8.073888238868676e-06, "loss_breakdown/pointer_loss": 0.20163920521736145, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 8.192899258574471e-06, "loss_breakdown/pointer_loss": 0.09656782448291779, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 6.035048045305302e-06, "loss_breakdown/pointer_loss": 0.21286964416503906, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 6.73124122840818e-06, "loss_breakdown/pointer_loss": 0.08515097200870514, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 5.263701496005524e-06, "loss_breakdown/pointer_loss": 0.43939414620399475, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 4.916677426081151e-06, "loss_breakdown/pointer_loss": 0.1547480970621109, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 6.4167661548708566e-06, "loss_breakdown/pointer_loss": 0.1328335702419281, "step": 6940 }, { "epoch": 0.7077028973499382, "loss_breakdown/lm_loss": 7.12702831151546e-06, "loss_breakdown/pointer_loss": 0.1245628148317337, "step": 6940 }, { "epoch": 0.7087226421587998, "grad_norm": 8.724095974351403, "learning_rate": 1.6186968838526914e-06, "loss": 0.2675, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 3.65402374882251e-05, "loss_breakdown/pointer_loss": 2.4037108421325684, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 1.0959842256852426e-05, "loss_breakdown/pointer_loss": 1.0436878204345703, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 4.084876854903996e-05, "loss_breakdown/pointer_loss": 1.0269412994384766, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 1.0926183676929213e-05, "loss_breakdown/pointer_loss": 0.5932062864303589, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 8.344484740518965e-06, "loss_breakdown/pointer_loss": 0.806380033493042, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 1.334042099188082e-05, "loss_breakdown/pointer_loss": 0.36146169900894165, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 8.970760063675698e-06, "loss_breakdown/pointer_loss": 0.6246650218963623, "step": 6950 }, { "epoch": 0.7087226421587998, "loss_breakdown/lm_loss": 7.327060302486643e-06, "loss_breakdown/pointer_loss": 1.349010944366455, "step": 6950 }, { "epoch": 0.7097423869676613, "grad_norm": 4.461739529759893, "learning_rate": 1.613031161473088e-06, "loss": 0.2457, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 6.0822621890110895e-06, "loss_breakdown/pointer_loss": 0.17233772575855255, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 5.303442321746843e-06, "loss_breakdown/pointer_loss": 0.10557244718074799, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 5.299191343510756e-06, "loss_breakdown/pointer_loss": 0.057019688189029694, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 6.680410024273442e-06, "loss_breakdown/pointer_loss": 0.2682267725467682, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 7.127027402020758e-06, "loss_breakdown/pointer_loss": 0.07478195428848267, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 5.602780220215209e-06, "loss_breakdown/pointer_loss": 0.08858191967010498, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 7.04121021044557e-06, "loss_breakdown/pointer_loss": 0.37137919664382935, "step": 6960 }, { "epoch": 0.7097423869676613, "loss_breakdown/lm_loss": 4.800121132575441e-06, "loss_breakdown/pointer_loss": 0.3672238886356354, "step": 6960 }, { "epoch": 0.7107621317765229, "grad_norm": 6.834600851803197, "learning_rate": 1.6073654390934844e-06, "loss": 0.2792, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 8.160243851307314e-06, "loss_breakdown/pointer_loss": 0.4433099031448364, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 5.3362355174613185e-06, "loss_breakdown/pointer_loss": 0.24888505041599274, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 6.267895514611155e-06, "loss_breakdown/pointer_loss": 0.3556783199310303, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 5.1475212785589974e-06, "loss_breakdown/pointer_loss": 0.1487899124622345, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 6.6023626459355e-06, "loss_breakdown/pointer_loss": 0.4144626259803772, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 9.384072654938791e-06, "loss_breakdown/pointer_loss": 0.23094269633293152, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 6.131548616394866e-06, "loss_breakdown/pointer_loss": 0.3012799620628357, "step": 6970 }, { "epoch": 0.7107621317765229, "loss_breakdown/lm_loss": 5.11922098667128e-06, "loss_breakdown/pointer_loss": 0.4456738233566284, "step": 6970 }, { "epoch": 0.7117818765853845, "grad_norm": 9.13663839774553, "learning_rate": 1.6016997167138811e-06, "loss": 0.2591, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 7.990869562490843e-06, "loss_breakdown/pointer_loss": 4.844991683959961, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 8.858418368618004e-06, "loss_breakdown/pointer_loss": 0.16373750567436218, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 4.756407179229427e-06, "loss_breakdown/pointer_loss": 0.0760197713971138, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 5.582908215728821e-06, "loss_breakdown/pointer_loss": 0.06609394401311874, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 7.609410658915294e-06, "loss_breakdown/pointer_loss": 2.733686923980713, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 1.1399932191125117e-05, "loss_breakdown/pointer_loss": 0.6776975393295288, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 7.649160579603631e-06, "loss_breakdown/pointer_loss": 0.3397044837474823, "step": 6980 }, { "epoch": 0.7117818765853845, "loss_breakdown/lm_loss": 8.888767297321465e-06, "loss_breakdown/pointer_loss": 2.2298104763031006, "step": 6980 }, { "epoch": 0.7128016213942461, "grad_norm": 8.256109769738469, "learning_rate": 1.5960339943342779e-06, "loss": 0.2676, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 8.925112524593715e-06, "loss_breakdown/pointer_loss": 0.09793566167354584, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 6.94645586918341e-06, "loss_breakdown/pointer_loss": 0.1705627143383026, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 9.225602298101876e-06, "loss_breakdown/pointer_loss": 0.39790815114974976, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 7.1792305789131206e-06, "loss_breakdown/pointer_loss": 0.2572145462036133, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 5.959167083346983e-06, "loss_breakdown/pointer_loss": 0.12657541036605835, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 5.67209099244792e-06, "loss_breakdown/pointer_loss": 0.2982724905014038, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 9.27550809137756e-06, "loss_breakdown/pointer_loss": 3.622147798538208, "step": 6990 }, { "epoch": 0.7128016213942461, "loss_breakdown/lm_loss": 5.145812338014366e-06, "loss_breakdown/pointer_loss": 0.6152721643447876, "step": 6990 }, { "epoch": 0.7138213662031077, "grad_norm": 7.351296962748111, "learning_rate": 1.5903682719546742e-06, "loss": 0.2794, "step": 7000 } ], "logging_steps": 10, "max_steps": 9806, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.5595570038927524e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }