{ "best_metric": 0.17041701078414917, "best_model_checkpoint": "/home/users/jomoll/dev/SRREval/output/models_tuned/roberta-PM-6/checkpoint-9947", "epoch": 10.0, "eval_steps": 500, "global_step": 14210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03518648838845883, "grad_norm": 6.621465682983398, "learning_rate": 7.032348804500703e-06, "loss": 9.6643, "step": 50 }, { "epoch": 0.07037297677691766, "grad_norm": 4.116970539093018, "learning_rate": 1.4064697609001406e-05, "loss": 6.6797, "step": 100 }, { "epoch": 0.1055594651653765, "grad_norm": 4.148292541503906, "learning_rate": 2.1097046413502112e-05, "loss": 5.4359, "step": 150 }, { "epoch": 0.14074595355383532, "grad_norm": 5.338364124298096, "learning_rate": 2.8129395218002813e-05, "loss": 4.5515, "step": 200 }, { "epoch": 0.17593244194229415, "grad_norm": 4.0342230796813965, "learning_rate": 3.516174402250352e-05, "loss": 3.2182, "step": 250 }, { "epoch": 0.211118930330753, "grad_norm": 3.043982744216919, "learning_rate": 4.2194092827004224e-05, "loss": 2.1068, "step": 300 }, { "epoch": 0.24630541871921183, "grad_norm": 2.9319007396698, "learning_rate": 4.9226441631504925e-05, "loss": 1.5306, "step": 350 }, { "epoch": 0.28149190710767064, "grad_norm": 2.049384593963623, "learning_rate": 5.6258790436005626e-05, "loss": 1.2641, "step": 400 }, { "epoch": 0.3166783954961295, "grad_norm": 1.8761528730392456, "learning_rate": 6.329113924050633e-05, "loss": 1.1155, "step": 450 }, { "epoch": 0.3518648838845883, "grad_norm": 1.4291869401931763, "learning_rate": 7.032348804500703e-05, "loss": 1.0073, "step": 500 }, { "epoch": 0.38705137227304715, "grad_norm": 1.9881365299224854, "learning_rate": 7.735583684950773e-05, "loss": 0.928, "step": 550 }, { "epoch": 0.422237860661506, "grad_norm": 1.5513331890106201, "learning_rate": 8.438818565400845e-05, "loss": 0.8684, "step": 600 }, { "epoch": 0.4574243490499648, "grad_norm": 1.2779691219329834, "learning_rate": 9.142053445850915e-05, "loss": 0.8177, "step": 650 }, { "epoch": 0.49261083743842365, "grad_norm": 1.4252148866653442, "learning_rate": 9.845288326300985e-05, "loss": 0.7673, "step": 700 }, { "epoch": 0.5277973258268824, "grad_norm": 1.2600018978118896, "learning_rate": 9.999794049527833e-05, "loss": 0.7115, "step": 750 }, { "epoch": 0.5629838142153413, "grad_norm": 1.2635945081710815, "learning_rate": 9.998927490751507e-05, "loss": 0.6504, "step": 800 }, { "epoch": 0.5981703026038001, "grad_norm": 1.3629568815231323, "learning_rate": 9.997384056940449e-05, "loss": 0.5944, "step": 850 }, { "epoch": 0.633356790992259, "grad_norm": 1.1627200841903687, "learning_rate": 9.99516395708185e-05, "loss": 0.54, "step": 900 }, { "epoch": 0.6685432793807178, "grad_norm": 1.2502806186676025, "learning_rate": 9.992267491786226e-05, "loss": 0.4839, "step": 950 }, { "epoch": 0.7037297677691766, "grad_norm": 1.1342246532440186, "learning_rate": 9.988695053246715e-05, "loss": 0.4523, "step": 1000 }, { "epoch": 0.7389162561576355, "grad_norm": 0.949360191822052, "learning_rate": 9.984447125185967e-05, "loss": 0.4186, "step": 1050 }, { "epoch": 0.7741027445460943, "grad_norm": 1.0214606523513794, "learning_rate": 9.97952428279065e-05, "loss": 0.4013, "step": 1100 }, { "epoch": 0.8092892329345531, "grad_norm": 0.9650245904922485, "learning_rate": 9.973927192633566e-05, "loss": 0.3876, "step": 1150 }, { "epoch": 0.844475721323012, "grad_norm": 1.0073192119598389, "learning_rate": 9.967656612583405e-05, "loss": 0.3739, "step": 1200 }, { "epoch": 0.8796622097114708, "grad_norm": 0.8714594841003418, "learning_rate": 9.960713391702104e-05, "loss": 0.3586, "step": 1250 }, { "epoch": 0.9148486980999296, "grad_norm": 0.7691583633422852, "learning_rate": 9.953098470129903e-05, "loss": 0.3505, "step": 1300 }, { "epoch": 0.9500351864883885, "grad_norm": 0.7321134805679321, "learning_rate": 9.944812878958035e-05, "loss": 0.3403, "step": 1350 }, { "epoch": 0.9852216748768473, "grad_norm": 0.723590612411499, "learning_rate": 9.935857740089109e-05, "loss": 0.3321, "step": 1400 }, { "epoch": 1.0, "eval_loss": 0.4019390344619751, "eval_rouge1": 0.8305208489862652, "eval_rouge2": 0.6982755534483135, "eval_rougeL": 0.7552194334031939, "eval_runtime": 454.74, "eval_samples_per_second": 2.146, "eval_steps_per_second": 0.134, "step": 1421 }, { "epoch": 1.0204081632653061, "grad_norm": 0.7262958884239197, "learning_rate": 9.92623426608521e-05, "loss": 0.318, "step": 1450 }, { "epoch": 1.0555946516537649, "grad_norm": 0.9095492362976074, "learning_rate": 9.915943760003708e-05, "loss": 0.3094, "step": 1500 }, { "epoch": 1.0907811400422238, "grad_norm": 0.743803083896637, "learning_rate": 9.904987615220814e-05, "loss": 0.3079, "step": 1550 }, { "epoch": 1.1259676284306825, "grad_norm": 0.7167914509773254, "learning_rate": 9.893367315242921e-05, "loss": 0.3027, "step": 1600 }, { "epoch": 1.1611541168191415, "grad_norm": 0.9233067035675049, "learning_rate": 9.881084433505724e-05, "loss": 0.2954, "step": 1650 }, { "epoch": 1.1963406052076002, "grad_norm": 0.7146203517913818, "learning_rate": 9.868140633161171e-05, "loss": 0.2942, "step": 1700 }, { "epoch": 1.2315270935960592, "grad_norm": 0.7871883511543274, "learning_rate": 9.854537666852272e-05, "loss": 0.2882, "step": 1750 }, { "epoch": 1.266713581984518, "grad_norm": 0.6794448494911194, "learning_rate": 9.840277376475772e-05, "loss": 0.2861, "step": 1800 }, { "epoch": 1.3019000703729768, "grad_norm": 0.6964978575706482, "learning_rate": 9.825361692932765e-05, "loss": 0.2812, "step": 1850 }, { "epoch": 1.3370865587614356, "grad_norm": 0.6423263549804688, "learning_rate": 9.809792635867229e-05, "loss": 0.2811, "step": 1900 }, { "epoch": 1.3722730471498945, "grad_norm": 0.6637555360794067, "learning_rate": 9.793572313392567e-05, "loss": 0.2769, "step": 1950 }, { "epoch": 1.4074595355383532, "grad_norm": 0.6535862684249878, "learning_rate": 9.776702921806158e-05, "loss": 0.2751, "step": 2000 }, { "epoch": 1.4426460239268122, "grad_norm": 0.647319495677948, "learning_rate": 9.759186745291966e-05, "loss": 0.2734, "step": 2050 }, { "epoch": 1.477832512315271, "grad_norm": 0.6112367510795593, "learning_rate": 9.741026155611255e-05, "loss": 0.2686, "step": 2100 }, { "epoch": 1.5130190007037299, "grad_norm": 0.5943325757980347, "learning_rate": 9.722223611781444e-05, "loss": 0.2686, "step": 2150 }, { "epoch": 1.5482054890921886, "grad_norm": 0.601610541343689, "learning_rate": 9.702781659743143e-05, "loss": 0.2632, "step": 2200 }, { "epoch": 1.5833919774806473, "grad_norm": 0.6396769285202026, "learning_rate": 9.682702932015424e-05, "loss": 0.2645, "step": 2250 }, { "epoch": 1.6185784658691063, "grad_norm": 0.633762776851654, "learning_rate": 9.661990147339369e-05, "loss": 0.2602, "step": 2300 }, { "epoch": 1.6537649542575652, "grad_norm": 0.6024792194366455, "learning_rate": 9.640646110309937e-05, "loss": 0.2604, "step": 2350 }, { "epoch": 1.688951442646024, "grad_norm": 0.5546603202819824, "learning_rate": 9.618673710996213e-05, "loss": 0.257, "step": 2400 }, { "epoch": 1.7241379310344827, "grad_norm": 0.5851404070854187, "learning_rate": 9.596075924550086e-05, "loss": 0.2577, "step": 2450 }, { "epoch": 1.7593244194229416, "grad_norm": 0.5852994918823242, "learning_rate": 9.57285581080339e-05, "loss": 0.2535, "step": 2500 }, { "epoch": 1.7945109078114005, "grad_norm": 0.5260228514671326, "learning_rate": 9.549016513853598e-05, "loss": 0.2523, "step": 2550 }, { "epoch": 1.8296973961998593, "grad_norm": 0.53746098279953, "learning_rate": 9.524561261638103e-05, "loss": 0.2488, "step": 2600 }, { "epoch": 1.864883884588318, "grad_norm": 0.6284821629524231, "learning_rate": 9.499493365497128e-05, "loss": 0.249, "step": 2650 }, { "epoch": 1.900070372976777, "grad_norm": 0.5424715280532837, "learning_rate": 9.473816219725373e-05, "loss": 0.2481, "step": 2700 }, { "epoch": 1.935256861365236, "grad_norm": 0.8342245221138, "learning_rate": 9.447533301112401e-05, "loss": 0.2434, "step": 2750 }, { "epoch": 1.9704433497536946, "grad_norm": 0.5217240452766418, "learning_rate": 9.420648168471875e-05, "loss": 0.2463, "step": 2800 }, { "epoch": 2.0, "eval_loss": 0.21821193397045135, "eval_rouge1": 0.8525383067193701, "eval_rouge2": 0.7310954803336809, "eval_rougeL": 0.7843714735900932, "eval_runtime": 449.4757, "eval_samples_per_second": 2.171, "eval_steps_per_second": 0.136, "step": 2842 }, { "epoch": 2.0056298381421533, "grad_norm": 0.5270591378211975, "learning_rate": 9.393164462159675e-05, "loss": 0.2384, "step": 2850 }, { "epoch": 2.0408163265306123, "grad_norm": 0.4357328712940216, "learning_rate": 9.365085903580982e-05, "loss": 0.2231, "step": 2900 }, { "epoch": 2.0760028149190712, "grad_norm": 0.4469678997993469, "learning_rate": 9.336416294686387e-05, "loss": 0.2251, "step": 2950 }, { "epoch": 2.1111893033075297, "grad_norm": 0.5681746602058411, "learning_rate": 9.307159517457082e-05, "loss": 0.2219, "step": 3000 }, { "epoch": 2.1463757916959887, "grad_norm": 0.5536030530929565, "learning_rate": 9.277319533379234e-05, "loss": 0.2238, "step": 3050 }, { "epoch": 2.1815622800844476, "grad_norm": 0.5085236430168152, "learning_rate": 9.246900382907582e-05, "loss": 0.2226, "step": 3100 }, { "epoch": 2.2167487684729066, "grad_norm": 0.5016751289367676, "learning_rate": 9.215906184918336e-05, "loss": 0.2204, "step": 3150 }, { "epoch": 2.251935256861365, "grad_norm": 0.48685961961746216, "learning_rate": 9.184341136151477e-05, "loss": 0.2208, "step": 3200 }, { "epoch": 2.287121745249824, "grad_norm": 0.4801936745643616, "learning_rate": 9.152209510642486e-05, "loss": 0.2218, "step": 3250 }, { "epoch": 2.322308233638283, "grad_norm": 0.510166347026825, "learning_rate": 9.119515659143633e-05, "loss": 0.2184, "step": 3300 }, { "epoch": 2.3574947220267415, "grad_norm": 0.8376412987709045, "learning_rate": 9.086264008534868e-05, "loss": 0.2205, "step": 3350 }, { "epoch": 2.3926812104152004, "grad_norm": 0.5366727709770203, "learning_rate": 9.0524590612244e-05, "loss": 0.2216, "step": 3400 }, { "epoch": 2.4278676988036594, "grad_norm": 0.44326144456863403, "learning_rate": 9.018105394539047e-05, "loss": 0.2198, "step": 3450 }, { "epoch": 2.4630541871921183, "grad_norm": 0.46824198961257935, "learning_rate": 8.983207660104458e-05, "loss": 0.2193, "step": 3500 }, { "epoch": 2.4982406755805773, "grad_norm": 0.5024743676185608, "learning_rate": 8.94777058321526e-05, "loss": 0.218, "step": 3550 }, { "epoch": 2.533427163969036, "grad_norm": 0.45793282985687256, "learning_rate": 8.911798962195235e-05, "loss": 0.218, "step": 3600 }, { "epoch": 2.5686136523574947, "grad_norm": 0.5126354694366455, "learning_rate": 8.875297667747598e-05, "loss": 0.2152, "step": 3650 }, { "epoch": 2.6038001407459537, "grad_norm": 0.4583749771118164, "learning_rate": 8.8382716422955e-05, "loss": 0.216, "step": 3700 }, { "epoch": 2.638986629134412, "grad_norm": 0.4271000623703003, "learning_rate": 8.800725899312794e-05, "loss": 0.2115, "step": 3750 }, { "epoch": 2.674173117522871, "grad_norm": 0.45106443762779236, "learning_rate": 8.76266552264519e-05, "loss": 0.2155, "step": 3800 }, { "epoch": 2.70935960591133, "grad_norm": 0.4409451186656952, "learning_rate": 8.724095665821885e-05, "loss": 0.2132, "step": 3850 }, { "epoch": 2.744546094299789, "grad_norm": 0.48048603534698486, "learning_rate": 8.685021551357756e-05, "loss": 0.2102, "step": 3900 }, { "epoch": 2.779732582688248, "grad_norm": 0.4103033244609833, "learning_rate": 8.645448470046203e-05, "loss": 0.2106, "step": 3950 }, { "epoch": 2.8149190710767065, "grad_norm": 0.4728895425796509, "learning_rate": 8.605381780242761e-05, "loss": 0.2094, "step": 4000 }, { "epoch": 2.8501055594651654, "grad_norm": 0.4339178800582886, "learning_rate": 8.564826907139551e-05, "loss": 0.2094, "step": 4050 }, { "epoch": 2.8852920478536244, "grad_norm": 0.3856242001056671, "learning_rate": 8.5237893420307e-05, "loss": 0.2094, "step": 4100 }, { "epoch": 2.920478536242083, "grad_norm": 0.41293683648109436, "learning_rate": 8.482274641568777e-05, "loss": 0.2079, "step": 4150 }, { "epoch": 2.955665024630542, "grad_norm": 0.4480830132961273, "learning_rate": 8.440288427012419e-05, "loss": 0.2098, "step": 4200 }, { "epoch": 2.9908515130190008, "grad_norm": 0.4007086455821991, "learning_rate": 8.397836383465177e-05, "loss": 0.2073, "step": 4250 }, { "epoch": 3.0, "eval_loss": 0.1954619288444519, "eval_rouge1": 0.8600097780074395, "eval_rouge2": 0.7422004525971719, "eval_rougeL": 0.792427877174053, "eval_runtime": 452.8255, "eval_samples_per_second": 2.155, "eval_steps_per_second": 0.135, "step": 4263 }, { "epoch": 3.0260380014074597, "grad_norm": 0.42829668521881104, "learning_rate": 8.354924259105735e-05, "loss": 0.1954, "step": 4300 }, { "epoch": 3.061224489795918, "grad_norm": 0.4310859739780426, "learning_rate": 8.311557864409585e-05, "loss": 0.1882, "step": 4350 }, { "epoch": 3.096410978184377, "grad_norm": 0.38604119420051575, "learning_rate": 8.267743071362261e-05, "loss": 0.1877, "step": 4400 }, { "epoch": 3.131597466572836, "grad_norm": 0.4186563193798065, "learning_rate": 8.223485812664254e-05, "loss": 0.1898, "step": 4450 }, { "epoch": 3.166783954961295, "grad_norm": 0.4156329333782196, "learning_rate": 8.178792080927693e-05, "loss": 0.1891, "step": 4500 }, { "epoch": 3.2019704433497536, "grad_norm": 0.39651060104370117, "learning_rate": 8.133667927864925e-05, "loss": 0.1907, "step": 4550 }, { "epoch": 3.2371569317382125, "grad_norm": 0.4282292425632477, "learning_rate": 8.088119463469088e-05, "loss": 0.1885, "step": 4600 }, { "epoch": 3.2723434201266715, "grad_norm": 0.3733578622341156, "learning_rate": 8.042152855186796e-05, "loss": 0.188, "step": 4650 }, { "epoch": 3.3075299085151304, "grad_norm": 0.4039362668991089, "learning_rate": 7.995774327083039e-05, "loss": 0.1858, "step": 4700 }, { "epoch": 3.342716396903589, "grad_norm": 0.4078548848628998, "learning_rate": 7.948990158998416e-05, "loss": 0.1889, "step": 4750 }, { "epoch": 3.377902885292048, "grad_norm": 0.4313489496707916, "learning_rate": 7.901806685698828e-05, "loss": 0.1868, "step": 4800 }, { "epoch": 3.413089373680507, "grad_norm": 0.3866104185581207, "learning_rate": 7.854230296017718e-05, "loss": 0.19, "step": 4850 }, { "epoch": 3.4482758620689653, "grad_norm": 0.3989320695400238, "learning_rate": 7.806267431990994e-05, "loss": 0.1882, "step": 4900 }, { "epoch": 3.4834623504574243, "grad_norm": 0.3907149136066437, "learning_rate": 7.757924587984762e-05, "loss": 0.187, "step": 4950 }, { "epoch": 3.518648838845883, "grad_norm": 0.4228266775608063, "learning_rate": 7.709208309815958e-05, "loss": 0.1875, "step": 5000 }, { "epoch": 3.553835327234342, "grad_norm": 0.6705135107040405, "learning_rate": 7.66012519386601e-05, "loss": 0.1859, "step": 5050 }, { "epoch": 3.589021815622801, "grad_norm": 0.4779379963874817, "learning_rate": 7.610681886187674e-05, "loss": 0.1865, "step": 5100 }, { "epoch": 3.6242083040112596, "grad_norm": 0.45365095138549805, "learning_rate": 7.56088508160512e-05, "loss": 0.1862, "step": 5150 }, { "epoch": 3.6593947923997185, "grad_norm": 0.4076572060585022, "learning_rate": 7.510741522807433e-05, "loss": 0.1889, "step": 5200 }, { "epoch": 3.6945812807881775, "grad_norm": 0.43747252225875854, "learning_rate": 7.46025799943563e-05, "loss": 0.1887, "step": 5250 }, { "epoch": 3.729767769176636, "grad_norm": 0.39369073510169983, "learning_rate": 7.409441347163295e-05, "loss": 0.1843, "step": 5300 }, { "epoch": 3.764954257565095, "grad_norm": 0.4166117310523987, "learning_rate": 7.358298446771027e-05, "loss": 0.1847, "step": 5350 }, { "epoch": 3.800140745953554, "grad_norm": 0.4330393671989441, "learning_rate": 7.306836223214737e-05, "loss": 0.1835, "step": 5400 }, { "epoch": 3.835327234342013, "grad_norm": 0.43853238224983215, "learning_rate": 7.255061644687987e-05, "loss": 0.1834, "step": 5450 }, { "epoch": 3.8705137227304713, "grad_norm": 0.3681392967700958, "learning_rate": 7.202981721678467e-05, "loss": 0.1867, "step": 5500 }, { "epoch": 3.9057002111189303, "grad_norm": 0.3907797634601593, "learning_rate": 7.150603506018746e-05, "loss": 0.1843, "step": 5550 }, { "epoch": 3.9408866995073892, "grad_norm": 0.41560277342796326, "learning_rate": 7.097934089931428e-05, "loss": 0.1835, "step": 5600 }, { "epoch": 3.9760731878958477, "grad_norm": 0.3772493302822113, "learning_rate": 7.044980605068838e-05, "loss": 0.1836, "step": 5650 }, { "epoch": 4.0, "eval_loss": 0.18461786210536957, "eval_rouge1": 0.8616013129223752, "eval_rouge2": 0.7480485417635835, "eval_rougeL": 0.7978102439084865, "eval_runtime": 451.3649, "eval_samples_per_second": 2.162, "eval_steps_per_second": 0.135, "step": 5684 }, { "epoch": 4.011259676284307, "grad_norm": 0.3513008952140808, "learning_rate": 6.991750221547356e-05, "loss": 0.177, "step": 5700 }, { "epoch": 4.046446164672766, "grad_norm": 0.4176090955734253, "learning_rate": 6.938250146976566e-05, "loss": 0.164, "step": 5750 }, { "epoch": 4.081632653061225, "grad_norm": 0.3744259178638458, "learning_rate": 6.884487625483319e-05, "loss": 0.1633, "step": 5800 }, { "epoch": 4.1168191414496835, "grad_norm": 0.4168541431427002, "learning_rate": 6.830469936730829e-05, "loss": 0.1653, "step": 5850 }, { "epoch": 4.1520056298381425, "grad_norm": 0.4053267240524292, "learning_rate": 6.776204394933005e-05, "loss": 0.1663, "step": 5900 }, { "epoch": 4.187192118226601, "grad_norm": 0.4216878116130829, "learning_rate": 6.721698347864054e-05, "loss": 0.1664, "step": 5950 }, { "epoch": 4.2223786066150595, "grad_norm": 0.3769453465938568, "learning_rate": 6.666959175863571e-05, "loss": 0.1653, "step": 6000 }, { "epoch": 4.257565095003518, "grad_norm": 0.36543887853622437, "learning_rate": 6.611994290837213e-05, "loss": 0.165, "step": 6050 }, { "epoch": 4.292751583391977, "grad_norm": 0.3971823453903198, "learning_rate": 6.5568111352531e-05, "loss": 0.165, "step": 6100 }, { "epoch": 4.327938071780436, "grad_norm": 0.35752159357070923, "learning_rate": 6.501417181134062e-05, "loss": 0.1653, "step": 6150 }, { "epoch": 4.363124560168895, "grad_norm": 0.3872394859790802, "learning_rate": 6.445819929045909e-05, "loss": 0.1702, "step": 6200 }, { "epoch": 4.398311048557354, "grad_norm": 0.3669523000717163, "learning_rate": 6.390026907081819e-05, "loss": 0.167, "step": 6250 }, { "epoch": 4.433497536945813, "grad_norm": 0.41579532623291016, "learning_rate": 6.334045669843001e-05, "loss": 0.1659, "step": 6300 }, { "epoch": 4.468684025334271, "grad_norm": 0.40912964940071106, "learning_rate": 6.277883797415778e-05, "loss": 0.165, "step": 6350 }, { "epoch": 4.50387051372273, "grad_norm": 0.3610992729663849, "learning_rate": 6.221548894345213e-05, "loss": 0.1643, "step": 6400 }, { "epoch": 4.539057002111189, "grad_norm": 0.36075299978256226, "learning_rate": 6.165048588605408e-05, "loss": 0.1654, "step": 6450 }, { "epoch": 4.574243490499648, "grad_norm": 0.3579295575618744, "learning_rate": 6.108390530566672e-05, "loss": 0.1653, "step": 6500 }, { "epoch": 4.609429978888107, "grad_norm": 0.3835432827472687, "learning_rate": 6.0515823919596014e-05, "loss": 0.1638, "step": 6550 }, { "epoch": 4.644616467276566, "grad_norm": 0.4055614173412323, "learning_rate": 5.994631864836314e-05, "loss": 0.1657, "step": 6600 }, { "epoch": 4.679802955665025, "grad_norm": 0.38296735286712646, "learning_rate": 5.937546660528911e-05, "loss": 0.1633, "step": 6650 }, { "epoch": 4.714989444053483, "grad_norm": 0.39152032136917114, "learning_rate": 5.880334508605327e-05, "loss": 0.1656, "step": 6700 }, { "epoch": 4.750175932441942, "grad_norm": 0.34158411622047424, "learning_rate": 5.823003155822722e-05, "loss": 0.1645, "step": 6750 }, { "epoch": 4.785362420830401, "grad_norm": 0.37078145146369934, "learning_rate": 5.765560365078529e-05, "loss": 0.1636, "step": 6800 }, { "epoch": 4.82054890921886, "grad_norm": 0.39319515228271484, "learning_rate": 5.708013914359344e-05, "loss": 0.1644, "step": 6850 }, { "epoch": 4.855735397607319, "grad_norm": 0.40831372141838074, "learning_rate": 5.650371595687728e-05, "loss": 0.1656, "step": 6900 }, { "epoch": 4.890921885995778, "grad_norm": 0.36898377537727356, "learning_rate": 5.5926412140671634e-05, "loss": 0.1636, "step": 6950 }, { "epoch": 4.926108374384237, "grad_norm": 0.34954026341438293, "learning_rate": 5.5348305864252084e-05, "loss": 0.1631, "step": 7000 }, { "epoch": 4.961294862772696, "grad_norm": 0.34124264121055603, "learning_rate": 5.476947540555056e-05, "loss": 0.1627, "step": 7050 }, { "epoch": 4.9964813511611545, "grad_norm": 0.3581432104110718, "learning_rate": 5.4189999140556246e-05, "loss": 0.1624, "step": 7100 }, { "epoch": 5.0, "eval_loss": 0.17616315186023712, "eval_rouge1": 0.8660875697572651, "eval_rouge2": 0.7542777496123054, "eval_rougeL": 0.8047666373953144, "eval_runtime": 451.8214, "eval_samples_per_second": 2.16, "eval_steps_per_second": 0.135, "step": 7105 }, { "epoch": 5.031667839549613, "grad_norm": 0.33652958273887634, "learning_rate": 5.360995553270308e-05, "loss": 0.1475, "step": 7150 }, { "epoch": 5.066854327938072, "grad_norm": 0.36935561895370483, "learning_rate": 5.302942312224554e-05, "loss": 0.1451, "step": 7200 }, { "epoch": 5.1020408163265305, "grad_norm": 0.3739613890647888, "learning_rate": 5.244848051562393e-05, "loss": 0.1471, "step": 7250 }, { "epoch": 5.1372273047149895, "grad_norm": 0.3686353862285614, "learning_rate": 5.1867206374820834e-05, "loss": 0.146, "step": 7300 }, { "epoch": 5.172413793103448, "grad_norm": 0.37493589520454407, "learning_rate": 5.128567940670984e-05, "loss": 0.1463, "step": 7350 }, { "epoch": 5.207600281491907, "grad_norm": 0.3784906268119812, "learning_rate": 5.070397835239844e-05, "loss": 0.1451, "step": 7400 }, { "epoch": 5.242786769880366, "grad_norm": 0.3828112483024597, "learning_rate": 5.012218197656602e-05, "loss": 0.1467, "step": 7450 }, { "epoch": 5.277973258268824, "grad_norm": 0.36482906341552734, "learning_rate": 4.9540369056798986e-05, "loss": 0.1481, "step": 7500 }, { "epoch": 5.313159746657283, "grad_norm": 0.3216530382633209, "learning_rate": 4.895861837292379e-05, "loss": 0.1482, "step": 7550 }, { "epoch": 5.348346235045742, "grad_norm": 0.3856315314769745, "learning_rate": 4.837700869633992e-05, "loss": 0.1474, "step": 7600 }, { "epoch": 5.383532723434201, "grad_norm": 0.3630039095878601, "learning_rate": 4.779561877935389e-05, "loss": 0.1465, "step": 7650 }, { "epoch": 5.41871921182266, "grad_norm": 0.3713323175907135, "learning_rate": 4.72145273445159e-05, "loss": 0.1479, "step": 7700 }, { "epoch": 5.453905700211119, "grad_norm": 0.34570086002349854, "learning_rate": 4.663381307396043e-05, "loss": 0.1481, "step": 7750 }, { "epoch": 5.489092188599578, "grad_norm": 0.3575226068496704, "learning_rate": 4.60535545987524e-05, "loss": 0.1469, "step": 7800 }, { "epoch": 5.524278676988036, "grad_norm": 0.4092855155467987, "learning_rate": 4.5473830488240206e-05, "loss": 0.1473, "step": 7850 }, { "epoch": 5.559465165376495, "grad_norm": 0.3667682707309723, "learning_rate": 4.4894719239417055e-05, "loss": 0.1463, "step": 7900 }, { "epoch": 5.594651653764954, "grad_norm": 0.37385767698287964, "learning_rate": 4.431629926629225e-05, "loss": 0.1475, "step": 7950 }, { "epoch": 5.629838142153413, "grad_norm": 0.34562572836875916, "learning_rate": 4.373864888927354e-05, "loss": 0.1474, "step": 8000 }, { "epoch": 5.665024630541872, "grad_norm": 0.3551468849182129, "learning_rate": 4.316184632456224e-05, "loss": 0.1471, "step": 8050 }, { "epoch": 5.700211118930331, "grad_norm": 0.4839322566986084, "learning_rate": 4.258596967356245e-05, "loss": 0.148, "step": 8100 }, { "epoch": 5.73539760731879, "grad_norm": 0.5742191672325134, "learning_rate": 4.2011096912305825e-05, "loss": 0.147, "step": 8150 }, { "epoch": 5.770584095707249, "grad_norm": 0.3712711036205292, "learning_rate": 4.1437305880893216e-05, "loss": 0.147, "step": 8200 }, { "epoch": 5.805770584095708, "grad_norm": 0.36071163415908813, "learning_rate": 4.086467427295496e-05, "loss": 0.1475, "step": 8250 }, { "epoch": 5.840957072484166, "grad_norm": 0.34014493227005005, "learning_rate": 4.029327962513069e-05, "loss": 0.1476, "step": 8300 }, { "epoch": 5.876143560872625, "grad_norm": 0.35158300399780273, "learning_rate": 3.972319930657065e-05, "loss": 0.1472, "step": 8350 }, { "epoch": 5.911330049261084, "grad_norm": 0.3580736219882965, "learning_rate": 3.9154510508459573e-05, "loss": 0.1467, "step": 8400 }, { "epoch": 5.946516537649543, "grad_norm": 0.35428690910339355, "learning_rate": 3.858729023356471e-05, "loss": 0.1475, "step": 8450 }, { "epoch": 5.9817030260380015, "grad_norm": 0.37157419323921204, "learning_rate": 3.8021615285809275e-05, "loss": 0.1474, "step": 8500 }, { "epoch": 6.0, "eval_loss": 0.17279887199401855, "eval_rouge1": 0.8664537877020483, "eval_rouge2": 0.7545350085119784, "eval_rougeL": 0.8043032064197381, "eval_runtime": 450.3696, "eval_samples_per_second": 2.167, "eval_steps_per_second": 0.135, "step": 8526 }, { "epoch": 6.0168895144264605, "grad_norm": 0.33888348937034607, "learning_rate": 3.7457562259872955e-05, "loss": 0.1389, "step": 8550 }, { "epoch": 6.052076002814919, "grad_norm": 0.3285595774650574, "learning_rate": 3.689520753082065e-05, "loss": 0.131, "step": 8600 }, { "epoch": 6.0872624912033775, "grad_norm": 0.35263049602508545, "learning_rate": 3.633462724376098e-05, "loss": 0.1324, "step": 8650 }, { "epoch": 6.122448979591836, "grad_norm": 0.3409713804721832, "learning_rate": 3.577589730353593e-05, "loss": 0.1314, "step": 8700 }, { "epoch": 6.157635467980295, "grad_norm": 0.3523200452327728, "learning_rate": 3.5219093364443024e-05, "loss": 0.1331, "step": 8750 }, { "epoch": 6.192821956368754, "grad_norm": 0.3560802936553955, "learning_rate": 3.4664290819991426e-05, "loss": 0.1302, "step": 8800 }, { "epoch": 6.228008444757213, "grad_norm": 0.35946422815322876, "learning_rate": 3.411156479269336e-05, "loss": 0.1325, "step": 8850 }, { "epoch": 6.263194933145672, "grad_norm": 0.37413743138313293, "learning_rate": 3.356099012389223e-05, "loss": 0.1324, "step": 8900 }, { "epoch": 6.298381421534131, "grad_norm": 0.36641544103622437, "learning_rate": 3.301264136362877e-05, "loss": 0.1307, "step": 8950 }, { "epoch": 6.33356790992259, "grad_norm": 0.32829925417900085, "learning_rate": 3.246659276054668e-05, "loss": 0.1325, "step": 9000 }, { "epoch": 6.368754398311048, "grad_norm": 0.3732859194278717, "learning_rate": 3.1922918251839065e-05, "loss": 0.1319, "step": 9050 }, { "epoch": 6.403940886699507, "grad_norm": 0.3437540531158447, "learning_rate": 3.138169145323701e-05, "loss": 0.1336, "step": 9100 }, { "epoch": 6.439127375087966, "grad_norm": 0.3742469847202301, "learning_rate": 3.084298564904177e-05, "loss": 0.1326, "step": 9150 }, { "epoch": 6.474313863476425, "grad_norm": 0.3869800269603729, "learning_rate": 3.0306873782201695e-05, "loss": 0.1325, "step": 9200 }, { "epoch": 6.509500351864884, "grad_norm": 0.3684918284416199, "learning_rate": 2.977342844443558e-05, "loss": 0.1336, "step": 9250 }, { "epoch": 6.544686840253343, "grad_norm": 0.348650187253952, "learning_rate": 2.924272186640333e-05, "loss": 0.1315, "step": 9300 }, { "epoch": 6.579873328641802, "grad_norm": 0.363425076007843, "learning_rate": 2.871482590792571e-05, "loss": 0.1324, "step": 9350 }, { "epoch": 6.615059817030261, "grad_norm": 0.3482951819896698, "learning_rate": 2.818981204825416e-05, "loss": 0.1332, "step": 9400 }, { "epoch": 6.650246305418719, "grad_norm": 0.350477397441864, "learning_rate": 2.7667751376392316e-05, "loss": 0.1313, "step": 9450 }, { "epoch": 6.685432793807178, "grad_norm": 0.3624843955039978, "learning_rate": 2.7148714581470224e-05, "loss": 0.1324, "step": 9500 }, { "epoch": 6.720619282195637, "grad_norm": 0.3529001772403717, "learning_rate": 2.663277194317272e-05, "loss": 0.1334, "step": 9550 }, { "epoch": 6.755805770584096, "grad_norm": 0.36429792642593384, "learning_rate": 2.611999332222331e-05, "loss": 0.1313, "step": 9600 }, { "epoch": 6.790992258972555, "grad_norm": 0.35888415575027466, "learning_rate": 2.5610448150924702e-05, "loss": 0.13, "step": 9650 }, { "epoch": 6.826178747361014, "grad_norm": 0.34723177552223206, "learning_rate": 2.5104205423757447e-05, "loss": 0.1316, "step": 9700 }, { "epoch": 6.8613652357494725, "grad_norm": 0.3660404682159424, "learning_rate": 2.4601333688037787e-05, "loss": 0.1318, "step": 9750 }, { "epoch": 6.896551724137931, "grad_norm": 0.3829537034034729, "learning_rate": 2.4101901034636048e-05, "loss": 0.1331, "step": 9800 }, { "epoch": 6.9317382125263896, "grad_norm": 0.3708410859107971, "learning_rate": 2.3605975088756892e-05, "loss": 0.1316, "step": 9850 }, { "epoch": 6.9669247009148485, "grad_norm": 0.3694988191127777, "learning_rate": 2.3113623000782565e-05, "loss": 0.1311, "step": 9900 }, { "epoch": 7.0, "eval_loss": 0.17041701078414917, "eval_rouge1": 0.8702112863863708, "eval_rouge2": 0.7602958962843911, "eval_rougeL": 0.8090768364435674, "eval_runtime": 450.9439, "eval_samples_per_second": 2.164, "eval_steps_per_second": 0.135, "step": 9947 }, { "epoch": 7.0021111893033074, "grad_norm": 0.3329477906227112, "learning_rate": 2.2624911437180478e-05, "loss": 0.132, "step": 9950 }, { "epoch": 7.037297677691766, "grad_norm": 0.3364117443561554, "learning_rate": 2.2139906571476295e-05, "loss": 0.1206, "step": 10000 }, { "epoch": 7.072484166080225, "grad_norm": 0.3616856336593628, "learning_rate": 2.1658674075293757e-05, "loss": 0.1196, "step": 10050 }, { "epoch": 7.107670654468684, "grad_norm": 0.336531400680542, "learning_rate": 2.1181279109462497e-05, "loss": 0.121, "step": 10100 }, { "epoch": 7.142857142857143, "grad_norm": 0.3527058959007263, "learning_rate": 2.0707786315194977e-05, "loss": 0.1203, "step": 10150 }, { "epoch": 7.178043631245601, "grad_norm": 0.3386009633541107, "learning_rate": 2.023825980533389e-05, "loss": 0.1205, "step": 10200 }, { "epoch": 7.21323011963406, "grad_norm": 0.3431272804737091, "learning_rate": 1.9772763155670836e-05, "loss": 0.1203, "step": 10250 }, { "epoch": 7.248416608022519, "grad_norm": 0.35417425632476807, "learning_rate": 1.931135939633809e-05, "loss": 0.1206, "step": 10300 }, { "epoch": 7.283603096410978, "grad_norm": 0.3702761232852936, "learning_rate": 1.885411100327399e-05, "loss": 0.1219, "step": 10350 }, { "epoch": 7.318789584799437, "grad_norm": 0.3507060706615448, "learning_rate": 1.8401079889763433e-05, "loss": 0.1205, "step": 10400 }, { "epoch": 7.353976073187896, "grad_norm": 0.3541838228702545, "learning_rate": 1.7952327398054587e-05, "loss": 0.1209, "step": 10450 }, { "epoch": 7.389162561576355, "grad_norm": 0.3458681106567383, "learning_rate": 1.7507914291052878e-05, "loss": 0.1207, "step": 10500 }, { "epoch": 7.424349049964814, "grad_norm": 0.33733808994293213, "learning_rate": 1.7067900744093483e-05, "loss": 0.1196, "step": 10550 }, { "epoch": 7.459535538353272, "grad_norm": 0.37008148431777954, "learning_rate": 1.6632346336793326e-05, "loss": 0.1209, "step": 10600 }, { "epoch": 7.494722026741731, "grad_norm": 0.3410815894603729, "learning_rate": 1.6201310044983725e-05, "loss": 0.1206, "step": 10650 }, { "epoch": 7.52990851513019, "grad_norm": 0.3718872368335724, "learning_rate": 1.5774850232724884e-05, "loss": 0.119, "step": 10700 }, { "epoch": 7.565095003518649, "grad_norm": 0.36411306262016296, "learning_rate": 1.535302464440314e-05, "loss": 0.1202, "step": 10750 }, { "epoch": 7.600281491907108, "grad_norm": 0.33343836665153503, "learning_rate": 1.4935890396912105e-05, "loss": 0.1207, "step": 10800 }, { "epoch": 7.635467980295567, "grad_norm": 0.39444583654403687, "learning_rate": 1.4523503971918862e-05, "loss": 0.1194, "step": 10850 }, { "epoch": 7.670654468684026, "grad_norm": 0.34955981373786926, "learning_rate": 1.4115921208216081e-05, "loss": 0.1205, "step": 10900 }, { "epoch": 7.705840957072484, "grad_norm": 0.3438533544540405, "learning_rate": 1.3713197294161234e-05, "loss": 0.12, "step": 10950 }, { "epoch": 7.741027445460943, "grad_norm": 0.3748343288898468, "learning_rate": 1.3315386760203862e-05, "loss": 0.1206, "step": 11000 }, { "epoch": 7.776213933849402, "grad_norm": 0.3981345593929291, "learning_rate": 1.2922543471501947e-05, "loss": 0.1191, "step": 11050 }, { "epoch": 7.811400422237861, "grad_norm": 0.34857961535453796, "learning_rate": 1.2534720620628327e-05, "loss": 0.1194, "step": 11100 }, { "epoch": 7.8465869106263195, "grad_norm": 0.34216824173927307, "learning_rate": 1.2151970720368216e-05, "loss": 0.1206, "step": 11150 }, { "epoch": 7.8817733990147785, "grad_norm": 0.357122540473938, "learning_rate": 1.1774345596608776e-05, "loss": 0.1211, "step": 11200 }, { "epoch": 7.916959887403237, "grad_norm": 0.3293827176094055, "learning_rate": 1.1401896381321664e-05, "loss": 0.1181, "step": 11250 }, { "epoch": 7.952146375791696, "grad_norm": 0.34047678112983704, "learning_rate": 1.1034673505639553e-05, "loss": 0.1208, "step": 11300 }, { "epoch": 7.987332864180155, "grad_norm": 0.3612457513809204, "learning_rate": 1.0672726693027585e-05, "loss": 0.119, "step": 11350 }, { "epoch": 8.0, "eval_loss": 0.17086505889892578, "eval_rouge1": 0.8709219864408166, "eval_rouge2": 0.7611385118494474, "eval_rougeL": 0.8088192845230161, "eval_runtime": 451.1411, "eval_samples_per_second": 2.163, "eval_steps_per_second": 0.135, "step": 11368 }, { "epoch": 8.022519352568613, "grad_norm": 0.4030775725841522, "learning_rate": 1.0316104952550525e-05, "loss": 0.1156, "step": 11400 }, { "epoch": 8.057705840957073, "grad_norm": 0.33062559366226196, "learning_rate": 9.964856572236853e-06, "loss": 0.1114, "step": 11450 }, { "epoch": 8.092892329345531, "grad_norm": 0.3237990438938141, "learning_rate": 9.619029112540318e-06, "loss": 0.1123, "step": 11500 }, { "epoch": 8.12807881773399, "grad_norm": 0.3725147843360901, "learning_rate": 9.278669399900031e-06, "loss": 0.1113, "step": 11550 }, { "epoch": 8.16326530612245, "grad_norm": 0.34692293405532837, "learning_rate": 8.943823520399996e-06, "loss": 0.1126, "step": 11600 }, { "epoch": 8.198451794510907, "grad_norm": 0.36533844470977783, "learning_rate": 8.614536813528885e-06, "loss": 0.1121, "step": 11650 }, { "epoch": 8.233638282899367, "grad_norm": 0.3476887047290802, "learning_rate": 8.290853866040865e-06, "loss": 0.1124, "step": 11700 }, { "epoch": 8.268824771287825, "grad_norm": 0.35408487915992737, "learning_rate": 7.972818505918389e-06, "loss": 0.1127, "step": 11750 }, { "epoch": 8.304011259676285, "grad_norm": 0.3611229658126831, "learning_rate": 7.660473796437701e-06, "loss": 0.1142, "step": 11800 }, { "epoch": 8.339197748064743, "grad_norm": 0.32982322573661804, "learning_rate": 7.353862030337871e-06, "loss": 0.1124, "step": 11850 }, { "epoch": 8.374384236453203, "grad_norm": 0.3514786958694458, "learning_rate": 7.053024724094237e-06, "loss": 0.1107, "step": 11900 }, { "epoch": 8.409570724841661, "grad_norm": 0.34895211458206177, "learning_rate": 6.758002612296877e-06, "loss": 0.1133, "step": 11950 }, { "epoch": 8.444757213230119, "grad_norm": 0.3492163121700287, "learning_rate": 6.468835642134941e-06, "loss": 0.1124, "step": 12000 }, { "epoch": 8.479943701618579, "grad_norm": 0.33811211585998535, "learning_rate": 6.185562967987729e-06, "loss": 0.1123, "step": 12050 }, { "epoch": 8.515130190007037, "grad_norm": 0.3364134132862091, "learning_rate": 5.90822294612296e-06, "loss": 0.1118, "step": 12100 }, { "epoch": 8.550316678395497, "grad_norm": 0.3297932744026184, "learning_rate": 5.6368531295032e-06, "loss": 0.1117, "step": 12150 }, { "epoch": 8.585503166783955, "grad_norm": 0.3443482220172882, "learning_rate": 5.371490262701023e-06, "loss": 0.1129, "step": 12200 }, { "epoch": 8.620689655172415, "grad_norm": 0.32068300247192383, "learning_rate": 5.112170276923706e-06, "loss": 0.1122, "step": 12250 }, { "epoch": 8.655876143560873, "grad_norm": 0.340096652507782, "learning_rate": 4.8589282851479015e-06, "loss": 0.1123, "step": 12300 }, { "epoch": 8.69106263194933, "grad_norm": 0.34847521781921387, "learning_rate": 4.611798577365284e-06, "loss": 0.1131, "step": 12350 }, { "epoch": 8.72624912033779, "grad_norm": 0.34428083896636963, "learning_rate": 4.370814615939489e-06, "loss": 0.1124, "step": 12400 }, { "epoch": 8.761435608726249, "grad_norm": 0.34424662590026855, "learning_rate": 4.136009031075217e-06, "loss": 0.1131, "step": 12450 }, { "epoch": 8.796622097114708, "grad_norm": 0.33876335620880127, "learning_rate": 3.907413616399935e-06, "loss": 0.1109, "step": 12500 }, { "epoch": 8.831808585503166, "grad_norm": 0.3361798822879791, "learning_rate": 3.685059324658924e-06, "loss": 0.1121, "step": 12550 }, { "epoch": 8.866995073891626, "grad_norm": 0.34564682841300964, "learning_rate": 3.46897626352411e-06, "loss": 0.1115, "step": 12600 }, { "epoch": 8.902181562280084, "grad_norm": 0.3171030879020691, "learning_rate": 3.2591936915174094e-06, "loss": 0.1123, "step": 12650 }, { "epoch": 8.937368050668542, "grad_norm": 0.3457452058792114, "learning_rate": 3.0557400140489745e-06, "loss": 0.112, "step": 12700 }, { "epoch": 8.972554539057002, "grad_norm": 0.34865686297416687, "learning_rate": 2.8586427795710026e-06, "loss": 0.113, "step": 12750 }, { "epoch": 9.0, "eval_loss": 0.17192131280899048, "eval_rouge1": 0.8712237136076882, "eval_rouge2": 0.7617908282670558, "eval_rougeL": 0.8093251316860329, "eval_runtime": 454.6716, "eval_samples_per_second": 2.147, "eval_steps_per_second": 0.134, "step": 12789 }, { "epoch": 9.00774102744546, "grad_norm": 0.3425465226173401, "learning_rate": 2.6679286758475584e-06, "loss": 0.1121, "step": 12800 }, { "epoch": 9.04292751583392, "grad_norm": 0.34095096588134766, "learning_rate": 2.4836235263409335e-06, "loss": 0.1092, "step": 12850 }, { "epoch": 9.078114004222378, "grad_norm": 0.3610168397426605, "learning_rate": 2.305752286715057e-06, "loss": 0.1079, "step": 12900 }, { "epoch": 9.113300492610838, "grad_norm": 0.35733482241630554, "learning_rate": 2.134339041456396e-06, "loss": 0.11, "step": 12950 }, { "epoch": 9.148486980999296, "grad_norm": 0.3327946066856384, "learning_rate": 1.9694070006128217e-06, "loss": 0.1084, "step": 13000 }, { "epoch": 9.183673469387756, "grad_norm": 0.33050432801246643, "learning_rate": 1.810978496650867e-06, "loss": 0.1072, "step": 13050 }, { "epoch": 9.218859957776214, "grad_norm": 0.3362489938735962, "learning_rate": 1.6590749814318186e-06, "loss": 0.1095, "step": 13100 }, { "epoch": 9.254046446164672, "grad_norm": 0.333631306886673, "learning_rate": 1.513717023307043e-06, "loss": 0.1094, "step": 13150 }, { "epoch": 9.289232934553132, "grad_norm": 0.33265042304992676, "learning_rate": 1.3749243043329807e-06, "loss": 0.1082, "step": 13200 }, { "epoch": 9.32441942294159, "grad_norm": 0.33508723974227905, "learning_rate": 1.2427156176060717e-06, "loss": 0.1094, "step": 13250 }, { "epoch": 9.35960591133005, "grad_norm": 0.3543477952480316, "learning_rate": 1.117108864718125e-06, "loss": 0.1081, "step": 13300 }, { "epoch": 9.394792399718508, "grad_norm": 0.33582067489624023, "learning_rate": 9.981210533323737e-07, "loss": 0.1086, "step": 13350 }, { "epoch": 9.429978888106968, "grad_norm": 0.33258703351020813, "learning_rate": 8.857682948805235e-07, "loss": 0.1089, "step": 13400 }, { "epoch": 9.465165376495426, "grad_norm": 0.3218045234680176, "learning_rate": 7.80065802381269e-07, "loss": 0.1074, "step": 13450 }, { "epoch": 9.500351864883884, "grad_norm": 0.3371478319168091, "learning_rate": 6.810278883803201e-07, "loss": 0.1079, "step": 13500 }, { "epoch": 9.535538353272344, "grad_norm": 0.3434385657310486, "learning_rate": 5.886679630124791e-07, "loss": 0.1079, "step": 13550 }, { "epoch": 9.570724841660802, "grad_norm": 0.3347020745277405, "learning_rate": 5.029985321858222e-07, "loss": 0.1098, "step": 13600 }, { "epoch": 9.605911330049262, "grad_norm": 0.3305223286151886, "learning_rate": 4.2403119588835806e-07, "loss": 0.1079, "step": 13650 }, { "epoch": 9.64109781843772, "grad_norm": 0.3528411090373993, "learning_rate": 3.5177664661735774e-07, "loss": 0.1089, "step": 13700 }, { "epoch": 9.67628430682618, "grad_norm": 0.33300718665122986, "learning_rate": 2.8624466793150674e-07, "loss": 0.1071, "step": 13750 }, { "epoch": 9.711470795214638, "grad_norm": 0.36753568053245544, "learning_rate": 2.2744413312622558e-07, "loss": 0.1095, "step": 13800 }, { "epoch": 9.746657283603096, "grad_norm": 0.3197505474090576, "learning_rate": 1.7538300403214801e-07, "loss": 0.1083, "step": 13850 }, { "epoch": 9.781843771991555, "grad_norm": 0.3311461806297302, "learning_rate": 1.300683299370664e-07, "loss": 0.1088, "step": 13900 }, { "epoch": 9.817030260380013, "grad_norm": 0.33025187253952026, "learning_rate": 9.150624663143981e-08, "loss": 0.1078, "step": 13950 }, { "epoch": 9.852216748768473, "grad_norm": 0.32541847229003906, "learning_rate": 5.970197557758073e-08, "loss": 0.1073, "step": 14000 }, { "epoch": 9.887403237156931, "grad_norm": 0.3378959596157074, "learning_rate": 3.465982320264294e-08, "loss": 0.1079, "step": 14050 }, { "epoch": 9.922589725545391, "grad_norm": 0.3336648643016815, "learning_rate": 1.6383180315504566e-08, "loss": 0.1098, "step": 14100 }, { "epoch": 9.95777621393385, "grad_norm": 0.33483999967575073, "learning_rate": 4.874521647668662e-09, "loss": 0.1093, "step": 14150 }, { "epoch": 9.992962702322309, "grad_norm": 0.3449588716030121, "learning_rate": 1.35405518131293e-10, "loss": 0.1081, "step": 14200 }, { "epoch": 10.0, "eval_loss": 0.17240069806575775, "eval_rouge1": 0.8711153309836848, "eval_rouge2": 0.7624256388120317, "eval_rougeL": 0.8109884566827475, "eval_runtime": 454.0512, "eval_samples_per_second": 2.15, "eval_steps_per_second": 0.134, "step": 14210 }, { "epoch": 10.0, "step": 14210, "total_flos": 8.063560184500127e+17, "train_loss": 0.30949260932639144, "train_runtime": 33830.9579, "train_samples_per_second": 53.76, "train_steps_per_second": 0.42 } ], "logging_steps": 50, "max_steps": 14210, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.063560184500127e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }