{ "best_metric": 37.22662494472977, "best_model_checkpoint": "./whisper-tiny-sg\\checkpoint-3000", "epoch": 29.953917050691246, "eval_steps": 1000, "global_step": 26000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02880184331797235, "grad_norm": 6.292309284210205, "learning_rate": 5.000000000000001e-07, "loss": 0.2891, "step": 25 }, { "epoch": 0.0576036866359447, "grad_norm": 5.9810028076171875, "learning_rate": 1.0000000000000002e-06, "loss": 0.2993, "step": 50 }, { "epoch": 0.08640552995391705, "grad_norm": 5.614720821380615, "learning_rate": 1.5e-06, "loss": 0.2736, "step": 75 }, { "epoch": 0.1152073732718894, "grad_norm": 5.561768054962158, "learning_rate": 2.0000000000000003e-06, "loss": 0.2881, "step": 100 }, { "epoch": 0.14400921658986174, "grad_norm": 5.616841793060303, "learning_rate": 2.5e-06, "loss": 0.2926, "step": 125 }, { "epoch": 0.1728110599078341, "grad_norm": 5.0698370933532715, "learning_rate": 3e-06, "loss": 0.2991, "step": 150 }, { "epoch": 0.20161290322580644, "grad_norm": 5.71262264251709, "learning_rate": 3.5e-06, "loss": 0.2851, "step": 175 }, { "epoch": 0.2304147465437788, "grad_norm": 5.8447418212890625, "learning_rate": 4.000000000000001e-06, "loss": 0.2921, "step": 200 }, { "epoch": 0.25921658986175117, "grad_norm": 6.156001567840576, "learning_rate": 4.5e-06, "loss": 0.2781, "step": 225 }, { "epoch": 0.2880184331797235, "grad_norm": 5.206575393676758, "learning_rate": 5e-06, "loss": 0.2926, "step": 250 }, { "epoch": 0.31682027649769584, "grad_norm": 6.387332916259766, "learning_rate": 5.500000000000001e-06, "loss": 0.3049, "step": 275 }, { "epoch": 0.3456221198156682, "grad_norm": 6.166511058807373, "learning_rate": 6e-06, "loss": 0.2869, "step": 300 }, { "epoch": 0.37442396313364057, "grad_norm": 5.7049560546875, "learning_rate": 6.5000000000000004e-06, "loss": 0.2935, "step": 325 }, { "epoch": 0.4032258064516129, "grad_norm": 6.147988796234131, "learning_rate": 7e-06, "loss": 0.2933, "step": 350 }, { "epoch": 0.43202764976958524, "grad_norm": 5.823277950286865, "learning_rate": 7.500000000000001e-06, "loss": 0.2878, "step": 375 }, { "epoch": 0.4608294930875576, "grad_norm": 6.57545804977417, "learning_rate": 8.000000000000001e-06, "loss": 0.2913, "step": 400 }, { "epoch": 0.48963133640552997, "grad_norm": 6.163663864135742, "learning_rate": 8.5e-06, "loss": 0.2918, "step": 425 }, { "epoch": 0.5184331797235023, "grad_norm": 7.054196834564209, "learning_rate": 9e-06, "loss": 0.2947, "step": 450 }, { "epoch": 0.5472350230414746, "grad_norm": 6.1844964027404785, "learning_rate": 9.5e-06, "loss": 0.2947, "step": 475 }, { "epoch": 0.576036866359447, "grad_norm": 6.3493733406066895, "learning_rate": 1e-05, "loss": 0.2916, "step": 500 }, { "epoch": 0.6048387096774194, "grad_norm": 6.614502429962158, "learning_rate": 9.99748743718593e-06, "loss": 0.3037, "step": 525 }, { "epoch": 0.6336405529953917, "grad_norm": 5.253581523895264, "learning_rate": 9.99497487437186e-06, "loss": 0.2788, "step": 550 }, { "epoch": 0.6624423963133641, "grad_norm": 5.883352756500244, "learning_rate": 9.99246231155779e-06, "loss": 0.2835, "step": 575 }, { "epoch": 0.6912442396313364, "grad_norm": 6.427688121795654, "learning_rate": 9.98994974874372e-06, "loss": 0.2799, "step": 600 }, { "epoch": 0.7200460829493087, "grad_norm": 5.49296236038208, "learning_rate": 9.987437185929649e-06, "loss": 0.29, "step": 625 }, { "epoch": 0.7488479262672811, "grad_norm": 6.0453386306762695, "learning_rate": 9.984924623115578e-06, "loss": 0.2803, "step": 650 }, { "epoch": 0.7776497695852534, "grad_norm": 6.909397602081299, "learning_rate": 9.982412060301508e-06, "loss": 0.2923, "step": 675 }, { "epoch": 0.8064516129032258, "grad_norm": 6.7183003425598145, "learning_rate": 9.979899497487437e-06, "loss": 0.2965, "step": 700 }, { "epoch": 0.8352534562211982, "grad_norm": 5.8181843757629395, "learning_rate": 9.977386934673368e-06, "loss": 0.2976, "step": 725 }, { "epoch": 0.8640552995391705, "grad_norm": 6.078603267669678, "learning_rate": 9.974874371859297e-06, "loss": 0.2965, "step": 750 }, { "epoch": 0.8928571428571429, "grad_norm": 6.3605637550354, "learning_rate": 9.972361809045227e-06, "loss": 0.315, "step": 775 }, { "epoch": 0.9216589861751152, "grad_norm": 6.193057060241699, "learning_rate": 9.969849246231156e-06, "loss": 0.3167, "step": 800 }, { "epoch": 0.9504608294930875, "grad_norm": 5.88997220993042, "learning_rate": 9.967336683417085e-06, "loss": 0.2978, "step": 825 }, { "epoch": 0.9792626728110599, "grad_norm": 5.3312835693359375, "learning_rate": 9.964824120603016e-06, "loss": 0.2867, "step": 850 }, { "epoch": 1.0080645161290323, "grad_norm": 6.321709156036377, "learning_rate": 9.962311557788946e-06, "loss": 0.267, "step": 875 }, { "epoch": 1.0368663594470047, "grad_norm": 5.386514663696289, "learning_rate": 9.959798994974875e-06, "loss": 0.2513, "step": 900 }, { "epoch": 1.0656682027649769, "grad_norm": 5.482215404510498, "learning_rate": 9.957286432160806e-06, "loss": 0.2575, "step": 925 }, { "epoch": 1.0944700460829493, "grad_norm": 5.6295037269592285, "learning_rate": 9.954773869346734e-06, "loss": 0.2577, "step": 950 }, { "epoch": 1.1232718894009217, "grad_norm": 6.107315540313721, "learning_rate": 9.952261306532665e-06, "loss": 0.2554, "step": 975 }, { "epoch": 1.1520737327188941, "grad_norm": 5.350543022155762, "learning_rate": 9.949748743718594e-06, "loss": 0.2454, "step": 1000 }, { "epoch": 1.1520737327188941, "eval_loss": 0.5919212102890015, "eval_runtime": 79.0041, "eval_samples_per_second": 40.061, "eval_steps_per_second": 5.012, "eval_wer": 38.83541376143669, "step": 1000 }, { "epoch": 1.1808755760368663, "grad_norm": 4.945489883422852, "learning_rate": 9.947236180904523e-06, "loss": 0.2403, "step": 1025 }, { "epoch": 1.2096774193548387, "grad_norm": 5.548666477203369, "learning_rate": 9.944723618090453e-06, "loss": 0.2512, "step": 1050 }, { "epoch": 1.238479262672811, "grad_norm": 5.913753509521484, "learning_rate": 9.942211055276382e-06, "loss": 0.249, "step": 1075 }, { "epoch": 1.2672811059907834, "grad_norm": 5.161712169647217, "learning_rate": 9.939698492462311e-06, "loss": 0.2528, "step": 1100 }, { "epoch": 1.2960829493087558, "grad_norm": 5.848782062530518, "learning_rate": 9.937185929648242e-06, "loss": 0.2481, "step": 1125 }, { "epoch": 1.3248847926267282, "grad_norm": 5.374751091003418, "learning_rate": 9.934673366834172e-06, "loss": 0.2406, "step": 1150 }, { "epoch": 1.3536866359447004, "grad_norm": 5.660360336303711, "learning_rate": 9.932160804020101e-06, "loss": 0.2526, "step": 1175 }, { "epoch": 1.3824884792626728, "grad_norm": 6.462464332580566, "learning_rate": 9.929648241206032e-06, "loss": 0.2533, "step": 1200 }, { "epoch": 1.4112903225806452, "grad_norm": 6.463425636291504, "learning_rate": 9.92713567839196e-06, "loss": 0.2624, "step": 1225 }, { "epoch": 1.4400921658986174, "grad_norm": 6.0017781257629395, "learning_rate": 9.92462311557789e-06, "loss": 0.254, "step": 1250 }, { "epoch": 1.4688940092165899, "grad_norm": 5.497277736663818, "learning_rate": 9.92211055276382e-06, "loss": 0.255, "step": 1275 }, { "epoch": 1.4976958525345623, "grad_norm": 6.226492404937744, "learning_rate": 9.91959798994975e-06, "loss": 0.248, "step": 1300 }, { "epoch": 1.5264976958525347, "grad_norm": 6.530552387237549, "learning_rate": 9.917085427135679e-06, "loss": 0.26, "step": 1325 }, { "epoch": 1.555299539170507, "grad_norm": 6.080399036407471, "learning_rate": 9.914572864321608e-06, "loss": 0.2409, "step": 1350 }, { "epoch": 1.5841013824884793, "grad_norm": 6.376711845397949, "learning_rate": 9.912060301507539e-06, "loss": 0.2585, "step": 1375 }, { "epoch": 1.6129032258064515, "grad_norm": 6.269228935241699, "learning_rate": 9.909547738693468e-06, "loss": 0.2637, "step": 1400 }, { "epoch": 1.641705069124424, "grad_norm": 6.135796070098877, "learning_rate": 9.907035175879398e-06, "loss": 0.2582, "step": 1425 }, { "epoch": 1.6705069124423964, "grad_norm": 5.8433837890625, "learning_rate": 9.904522613065327e-06, "loss": 0.2442, "step": 1450 }, { "epoch": 1.6993087557603688, "grad_norm": 6.258557319641113, "learning_rate": 9.902010050251258e-06, "loss": 0.2404, "step": 1475 }, { "epoch": 1.728110599078341, "grad_norm": 6.063734531402588, "learning_rate": 9.899497487437186e-06, "loss": 0.2622, "step": 1500 }, { "epoch": 1.7569124423963134, "grad_norm": 6.452296257019043, "learning_rate": 9.896984924623117e-06, "loss": 0.2495, "step": 1525 }, { "epoch": 1.7857142857142856, "grad_norm": 5.900432109832764, "learning_rate": 9.894472361809046e-06, "loss": 0.2627, "step": 1550 }, { "epoch": 1.814516129032258, "grad_norm": 5.327248573303223, "learning_rate": 9.891959798994975e-06, "loss": 0.2457, "step": 1575 }, { "epoch": 1.8433179723502304, "grad_norm": 5.504421710968018, "learning_rate": 9.889447236180906e-06, "loss": 0.2469, "step": 1600 }, { "epoch": 1.8721198156682028, "grad_norm": 5.1115946769714355, "learning_rate": 9.886934673366836e-06, "loss": 0.2474, "step": 1625 }, { "epoch": 1.9009216589861753, "grad_norm": 6.488431930541992, "learning_rate": 9.884422110552765e-06, "loss": 0.2528, "step": 1650 }, { "epoch": 1.9297235023041475, "grad_norm": 5.7694478034973145, "learning_rate": 9.881909547738694e-06, "loss": 0.2684, "step": 1675 }, { "epoch": 1.9585253456221197, "grad_norm": 5.066587448120117, "learning_rate": 9.879396984924624e-06, "loss": 0.2589, "step": 1700 }, { "epoch": 1.987327188940092, "grad_norm": 6.58125114440918, "learning_rate": 9.876884422110553e-06, "loss": 0.2469, "step": 1725 }, { "epoch": 2.0161290322580645, "grad_norm": 5.03066349029541, "learning_rate": 9.874371859296484e-06, "loss": 0.2222, "step": 1750 }, { "epoch": 2.044930875576037, "grad_norm": 5.261024475097656, "learning_rate": 9.871859296482413e-06, "loss": 0.1924, "step": 1775 }, { "epoch": 2.0737327188940093, "grad_norm": 4.258754730224609, "learning_rate": 9.869346733668343e-06, "loss": 0.19, "step": 1800 }, { "epoch": 2.1025345622119818, "grad_norm": 5.060421466827393, "learning_rate": 9.866834170854272e-06, "loss": 0.1956, "step": 1825 }, { "epoch": 2.1313364055299537, "grad_norm": 4.822159767150879, "learning_rate": 9.864321608040201e-06, "loss": 0.1964, "step": 1850 }, { "epoch": 2.160138248847926, "grad_norm": 4.639535427093506, "learning_rate": 9.861809045226132e-06, "loss": 0.2008, "step": 1875 }, { "epoch": 2.1889400921658986, "grad_norm": 5.3064093589782715, "learning_rate": 9.859296482412062e-06, "loss": 0.1971, "step": 1900 }, { "epoch": 2.217741935483871, "grad_norm": 5.586013317108154, "learning_rate": 9.856783919597991e-06, "loss": 0.1916, "step": 1925 }, { "epoch": 2.2465437788018434, "grad_norm": 5.496710300445557, "learning_rate": 9.85427135678392e-06, "loss": 0.2018, "step": 1950 }, { "epoch": 2.275345622119816, "grad_norm": 5.848773002624512, "learning_rate": 9.85175879396985e-06, "loss": 0.1938, "step": 1975 }, { "epoch": 2.3041474654377883, "grad_norm": 5.33262825012207, "learning_rate": 9.84924623115578e-06, "loss": 0.1962, "step": 2000 }, { "epoch": 2.3041474654377883, "eval_loss": 0.5862352252006531, "eval_runtime": 82.9161, "eval_samples_per_second": 38.171, "eval_steps_per_second": 4.776, "eval_wer": 37.6483793068263, "step": 2000 }, { "epoch": 2.3329493087557602, "grad_norm": 5.573830604553223, "learning_rate": 9.84673366834171e-06, "loss": 0.2021, "step": 2025 }, { "epoch": 2.3617511520737327, "grad_norm": 5.0813517570495605, "learning_rate": 9.84422110552764e-06, "loss": 0.1952, "step": 2050 }, { "epoch": 2.390552995391705, "grad_norm": 5.276577949523926, "learning_rate": 9.841708542713569e-06, "loss": 0.1868, "step": 2075 }, { "epoch": 2.4193548387096775, "grad_norm": 5.183727741241455, "learning_rate": 9.83929648241206e-06, "loss": 0.205, "step": 2100 }, { "epoch": 2.44815668202765, "grad_norm": 6.511087417602539, "learning_rate": 9.836783919597991e-06, "loss": 0.2049, "step": 2125 }, { "epoch": 2.476958525345622, "grad_norm": 5.8599772453308105, "learning_rate": 9.83427135678392e-06, "loss": 0.2057, "step": 2150 }, { "epoch": 2.5057603686635943, "grad_norm": 4.825043201446533, "learning_rate": 9.83175879396985e-06, "loss": 0.2058, "step": 2175 }, { "epoch": 2.5345622119815667, "grad_norm": 5.699452877044678, "learning_rate": 9.82924623115578e-06, "loss": 0.2115, "step": 2200 }, { "epoch": 2.563364055299539, "grad_norm": 5.8178582191467285, "learning_rate": 9.826733668341708e-06, "loss": 0.2005, "step": 2225 }, { "epoch": 2.5921658986175116, "grad_norm": 5.104362964630127, "learning_rate": 9.82422110552764e-06, "loss": 0.1992, "step": 2250 }, { "epoch": 2.620967741935484, "grad_norm": 4.881691932678223, "learning_rate": 9.821708542713569e-06, "loss": 0.1853, "step": 2275 }, { "epoch": 2.6497695852534564, "grad_norm": 5.537844181060791, "learning_rate": 9.819195979899498e-06, "loss": 0.2013, "step": 2300 }, { "epoch": 2.678571428571429, "grad_norm": 6.034202575683594, "learning_rate": 9.816683417085427e-06, "loss": 0.2063, "step": 2325 }, { "epoch": 2.707373271889401, "grad_norm": 5.854854583740234, "learning_rate": 9.814170854271358e-06, "loss": 0.1948, "step": 2350 }, { "epoch": 2.736175115207373, "grad_norm": 5.279241561889648, "learning_rate": 9.811658291457288e-06, "loss": 0.205, "step": 2375 }, { "epoch": 2.7649769585253456, "grad_norm": 5.358358383178711, "learning_rate": 9.809145728643217e-06, "loss": 0.1982, "step": 2400 }, { "epoch": 2.793778801843318, "grad_norm": 5.770386219024658, "learning_rate": 9.806633165829146e-06, "loss": 0.1987, "step": 2425 }, { "epoch": 2.8225806451612905, "grad_norm": 5.92080020904541, "learning_rate": 9.804120603015076e-06, "loss": 0.2075, "step": 2450 }, { "epoch": 2.8513824884792625, "grad_norm": 5.451973915100098, "learning_rate": 9.801608040201007e-06, "loss": 0.2084, "step": 2475 }, { "epoch": 2.880184331797235, "grad_norm": 7.018340110778809, "learning_rate": 9.799095477386934e-06, "loss": 0.204, "step": 2500 }, { "epoch": 2.9089861751152073, "grad_norm": 5.575046062469482, "learning_rate": 9.796582914572865e-06, "loss": 0.2132, "step": 2525 }, { "epoch": 2.9377880184331797, "grad_norm": 6.1426615715026855, "learning_rate": 9.794070351758795e-06, "loss": 0.2016, "step": 2550 }, { "epoch": 2.966589861751152, "grad_norm": 5.2747907638549805, "learning_rate": 9.791557788944724e-06, "loss": 0.1933, "step": 2575 }, { "epoch": 2.9953917050691246, "grad_norm": 4.977904319763184, "learning_rate": 9.789045226130655e-06, "loss": 0.2015, "step": 2600 }, { "epoch": 3.024193548387097, "grad_norm": 4.180039405822754, "learning_rate": 9.786532663316584e-06, "loss": 0.1564, "step": 2625 }, { "epoch": 3.052995391705069, "grad_norm": 4.429443359375, "learning_rate": 9.784020100502514e-06, "loss": 0.1485, "step": 2650 }, { "epoch": 3.0817972350230414, "grad_norm": 4.283970832824707, "learning_rate": 9.781507537688443e-06, "loss": 0.1482, "step": 2675 }, { "epoch": 3.110599078341014, "grad_norm": 4.165538311004639, "learning_rate": 9.778994974874372e-06, "loss": 0.1543, "step": 2700 }, { "epoch": 3.139400921658986, "grad_norm": 4.36826753616333, "learning_rate": 9.776482412060302e-06, "loss": 0.1541, "step": 2725 }, { "epoch": 3.1682027649769586, "grad_norm": 4.2339396476745605, "learning_rate": 9.773969849246233e-06, "loss": 0.1483, "step": 2750 }, { "epoch": 3.197004608294931, "grad_norm": 4.818552494049072, "learning_rate": 9.77145728643216e-06, "loss": 0.1525, "step": 2775 }, { "epoch": 3.225806451612903, "grad_norm": 4.238425254821777, "learning_rate": 9.768944723618091e-06, "loss": 0.1507, "step": 2800 }, { "epoch": 3.2546082949308754, "grad_norm": 4.662484169006348, "learning_rate": 9.76643216080402e-06, "loss": 0.159, "step": 2825 }, { "epoch": 3.283410138248848, "grad_norm": 4.323627948760986, "learning_rate": 9.76391959798995e-06, "loss": 0.1575, "step": 2850 }, { "epoch": 3.3122119815668203, "grad_norm": 5.29601526260376, "learning_rate": 9.761407035175881e-06, "loss": 0.157, "step": 2875 }, { "epoch": 3.3410138248847927, "grad_norm": 4.703489780426025, "learning_rate": 9.75889447236181e-06, "loss": 0.1592, "step": 2900 }, { "epoch": 3.369815668202765, "grad_norm": 5.550471305847168, "learning_rate": 9.75638190954774e-06, "loss": 0.1458, "step": 2925 }, { "epoch": 3.3986175115207375, "grad_norm": 4.693049907684326, "learning_rate": 9.753869346733669e-06, "loss": 0.1535, "step": 2950 }, { "epoch": 3.4274193548387095, "grad_norm": 4.520618915557861, "learning_rate": 9.751356783919598e-06, "loss": 0.1595, "step": 2975 }, { "epoch": 3.456221198156682, "grad_norm": 5.345832824707031, "learning_rate": 9.74884422110553e-06, "loss": 0.1514, "step": 3000 }, { "epoch": 3.456221198156682, "eval_loss": 0.6002869009971619, "eval_runtime": 82.7201, "eval_samples_per_second": 38.262, "eval_steps_per_second": 4.787, "eval_wer": 37.22662494472977, "step": 3000 }, { "epoch": 3.4850230414746544, "grad_norm": 4.942008972167969, "learning_rate": 9.746331658291459e-06, "loss": 0.1476, "step": 3025 }, { "epoch": 3.513824884792627, "grad_norm": 6.1948394775390625, "learning_rate": 9.743819095477388e-06, "loss": 0.1689, "step": 3050 }, { "epoch": 3.542626728110599, "grad_norm": 5.902460098266602, "learning_rate": 9.741306532663317e-06, "loss": 0.158, "step": 3075 }, { "epoch": 3.571428571428571, "grad_norm": 5.279384136199951, "learning_rate": 9.738793969849247e-06, "loss": 0.1611, "step": 3100 }, { "epoch": 3.6002304147465436, "grad_norm": 5.252782344818115, "learning_rate": 9.736281407035176e-06, "loss": 0.1704, "step": 3125 }, { "epoch": 3.629032258064516, "grad_norm": 5.107100009918213, "learning_rate": 9.733768844221107e-06, "loss": 0.1567, "step": 3150 }, { "epoch": 3.6578341013824884, "grad_norm": 4.659441947937012, "learning_rate": 9.731256281407036e-06, "loss": 0.1565, "step": 3175 }, { "epoch": 3.686635944700461, "grad_norm": 4.246470928192139, "learning_rate": 9.728743718592966e-06, "loss": 0.1635, "step": 3200 }, { "epoch": 3.7154377880184333, "grad_norm": 5.37877082824707, "learning_rate": 9.726231155778897e-06, "loss": 0.1589, "step": 3225 }, { "epoch": 3.7442396313364057, "grad_norm": 4.6406426429748535, "learning_rate": 9.723718592964824e-06, "loss": 0.1613, "step": 3250 }, { "epoch": 3.773041474654378, "grad_norm": 5.088306903839111, "learning_rate": 9.721206030150755e-06, "loss": 0.1609, "step": 3275 }, { "epoch": 3.80184331797235, "grad_norm": 5.51893949508667, "learning_rate": 9.718693467336685e-06, "loss": 0.1617, "step": 3300 }, { "epoch": 3.8306451612903225, "grad_norm": 5.415011882781982, "learning_rate": 9.716180904522614e-06, "loss": 0.1684, "step": 3325 }, { "epoch": 3.859447004608295, "grad_norm": 5.6754655838012695, "learning_rate": 9.713668341708543e-06, "loss": 0.1587, "step": 3350 }, { "epoch": 3.8882488479262673, "grad_norm": 5.632762908935547, "learning_rate": 9.711155778894472e-06, "loss": 0.1596, "step": 3375 }, { "epoch": 3.9170506912442398, "grad_norm": 4.767159461975098, "learning_rate": 9.708643216080402e-06, "loss": 0.1678, "step": 3400 }, { "epoch": 3.9458525345622117, "grad_norm": 5.525444507598877, "learning_rate": 9.706130653266333e-06, "loss": 0.1632, "step": 3425 }, { "epoch": 3.974654377880184, "grad_norm": 5.185251235961914, "learning_rate": 9.703618090452262e-06, "loss": 0.1708, "step": 3450 }, { "epoch": 4.003456221198157, "grad_norm": 3.655832290649414, "learning_rate": 9.701105527638191e-06, "loss": 0.1516, "step": 3475 }, { "epoch": 4.032258064516129, "grad_norm": 4.749524116516113, "learning_rate": 9.698592964824122e-06, "loss": 0.1181, "step": 3500 }, { "epoch": 4.061059907834101, "grad_norm": 4.0963592529296875, "learning_rate": 9.69608040201005e-06, "loss": 0.1169, "step": 3525 }, { "epoch": 4.089861751152074, "grad_norm": 4.0425639152526855, "learning_rate": 9.693567839195981e-06, "loss": 0.1128, "step": 3550 }, { "epoch": 4.118663594470046, "grad_norm": 4.164423942565918, "learning_rate": 9.69105527638191e-06, "loss": 0.1197, "step": 3575 }, { "epoch": 4.147465437788019, "grad_norm": 4.198514461517334, "learning_rate": 9.68854271356784e-06, "loss": 0.1209, "step": 3600 }, { "epoch": 4.176267281105991, "grad_norm": 4.42313289642334, "learning_rate": 9.68603015075377e-06, "loss": 0.1198, "step": 3625 }, { "epoch": 4.2050691244239635, "grad_norm": 3.9469261169433594, "learning_rate": 9.683517587939698e-06, "loss": 0.1219, "step": 3650 }, { "epoch": 4.233870967741935, "grad_norm": 4.5506792068481445, "learning_rate": 9.68100502512563e-06, "loss": 0.1149, "step": 3675 }, { "epoch": 4.2626728110599075, "grad_norm": 4.171515464782715, "learning_rate": 9.678492462311559e-06, "loss": 0.1227, "step": 3700 }, { "epoch": 4.29147465437788, "grad_norm": 4.8595781326293945, "learning_rate": 9.675979899497488e-06, "loss": 0.1238, "step": 3725 }, { "epoch": 4.320276497695852, "grad_norm": 4.307956695556641, "learning_rate": 9.673467336683417e-06, "loss": 0.1219, "step": 3750 }, { "epoch": 4.349078341013825, "grad_norm": 4.462159633636475, "learning_rate": 9.670954773869348e-06, "loss": 0.1147, "step": 3775 }, { "epoch": 4.377880184331797, "grad_norm": 3.99963641166687, "learning_rate": 9.668442211055276e-06, "loss": 0.121, "step": 3800 }, { "epoch": 4.40668202764977, "grad_norm": 4.081131935119629, "learning_rate": 9.665929648241207e-06, "loss": 0.1184, "step": 3825 }, { "epoch": 4.435483870967742, "grad_norm": 4.748305320739746, "learning_rate": 9.663417085427136e-06, "loss": 0.1215, "step": 3850 }, { "epoch": 4.464285714285714, "grad_norm": 4.503684043884277, "learning_rate": 9.660904522613066e-06, "loss": 0.1219, "step": 3875 }, { "epoch": 4.493087557603687, "grad_norm": 4.571436405181885, "learning_rate": 9.658391959798997e-06, "loss": 0.1193, "step": 3900 }, { "epoch": 4.521889400921659, "grad_norm": 3.9383575916290283, "learning_rate": 9.655879396984924e-06, "loss": 0.1222, "step": 3925 }, { "epoch": 4.550691244239632, "grad_norm": 3.9949371814727783, "learning_rate": 9.653366834170855e-06, "loss": 0.123, "step": 3950 }, { "epoch": 4.579493087557603, "grad_norm": 4.606729984283447, "learning_rate": 9.650854271356785e-06, "loss": 0.1236, "step": 3975 }, { "epoch": 4.6082949308755765, "grad_norm": 3.996870517730713, "learning_rate": 9.648341708542714e-06, "loss": 0.1236, "step": 4000 }, { "epoch": 4.6082949308755765, "eval_loss": 0.6199113130569458, "eval_runtime": 81.3712, "eval_samples_per_second": 38.896, "eval_steps_per_second": 4.867, "eval_wer": 37.284446107275265, "step": 4000 }, { "epoch": 4.637096774193548, "grad_norm": 5.838872909545898, "learning_rate": 9.645829145728643e-06, "loss": 0.1291, "step": 4025 }, { "epoch": 4.6658986175115205, "grad_norm": 3.983415365219116, "learning_rate": 9.643316582914574e-06, "loss": 0.1266, "step": 4050 }, { "epoch": 4.694700460829493, "grad_norm": 4.9192705154418945, "learning_rate": 9.640804020100504e-06, "loss": 0.1258, "step": 4075 }, { "epoch": 4.723502304147465, "grad_norm": 4.198337078094482, "learning_rate": 9.638291457286433e-06, "loss": 0.125, "step": 4100 }, { "epoch": 4.752304147465438, "grad_norm": 4.538558483123779, "learning_rate": 9.635778894472362e-06, "loss": 0.1331, "step": 4125 }, { "epoch": 4.78110599078341, "grad_norm": 4.647339820861816, "learning_rate": 9.633266331658292e-06, "loss": 0.1325, "step": 4150 }, { "epoch": 4.809907834101383, "grad_norm": 4.335594177246094, "learning_rate": 9.630753768844223e-06, "loss": 0.1249, "step": 4175 }, { "epoch": 4.838709677419355, "grad_norm": 4.112964630126953, "learning_rate": 9.628341708542714e-06, "loss": 0.1346, "step": 4200 }, { "epoch": 4.867511520737327, "grad_norm": 5.007156848907471, "learning_rate": 9.625829145728644e-06, "loss": 0.1289, "step": 4225 }, { "epoch": 4.8963133640553, "grad_norm": 4.7816948890686035, "learning_rate": 9.623316582914573e-06, "loss": 0.1322, "step": 4250 }, { "epoch": 4.925115207373272, "grad_norm": 5.00559663772583, "learning_rate": 9.620804020100504e-06, "loss": 0.1278, "step": 4275 }, { "epoch": 4.953917050691244, "grad_norm": 3.7643561363220215, "learning_rate": 9.618291457286433e-06, "loss": 0.1239, "step": 4300 }, { "epoch": 4.982718894009217, "grad_norm": 4.690212249755859, "learning_rate": 9.615778894472363e-06, "loss": 0.1242, "step": 4325 }, { "epoch": 5.011520737327189, "grad_norm": 2.939924716949463, "learning_rate": 9.613266331658292e-06, "loss": 0.116, "step": 4350 }, { "epoch": 5.040322580645161, "grad_norm": 4.182240962982178, "learning_rate": 9.610753768844223e-06, "loss": 0.0895, "step": 4375 }, { "epoch": 5.0691244239631335, "grad_norm": 3.6691324710845947, "learning_rate": 9.60824120603015e-06, "loss": 0.0919, "step": 4400 }, { "epoch": 5.097926267281106, "grad_norm": 3.371245861053467, "learning_rate": 9.605728643216082e-06, "loss": 0.0922, "step": 4425 }, { "epoch": 5.126728110599078, "grad_norm": 3.1805381774902344, "learning_rate": 9.60321608040201e-06, "loss": 0.0878, "step": 4450 }, { "epoch": 5.155529953917051, "grad_norm": 3.0305440425872803, "learning_rate": 9.60070351758794e-06, "loss": 0.0864, "step": 4475 }, { "epoch": 5.184331797235023, "grad_norm": 4.845422267913818, "learning_rate": 9.598190954773871e-06, "loss": 0.0944, "step": 4500 }, { "epoch": 5.2131336405529956, "grad_norm": 4.524725914001465, "learning_rate": 9.595678391959799e-06, "loss": 0.0948, "step": 4525 }, { "epoch": 5.241935483870968, "grad_norm": 4.77572774887085, "learning_rate": 9.59316582914573e-06, "loss": 0.0917, "step": 4550 }, { "epoch": 5.27073732718894, "grad_norm": 3.662625312805176, "learning_rate": 9.590753768844221e-06, "loss": 0.0915, "step": 4575 }, { "epoch": 5.299539170506913, "grad_norm": 3.541950225830078, "learning_rate": 9.58824120603015e-06, "loss": 0.0939, "step": 4600 }, { "epoch": 5.328341013824884, "grad_norm": 4.37388801574707, "learning_rate": 9.585728643216082e-06, "loss": 0.0949, "step": 4625 }, { "epoch": 5.357142857142857, "grad_norm": 3.6708154678344727, "learning_rate": 9.583216080402011e-06, "loss": 0.0909, "step": 4650 }, { "epoch": 5.385944700460829, "grad_norm": 3.9196252822875977, "learning_rate": 9.58070351758794e-06, "loss": 0.0946, "step": 4675 }, { "epoch": 5.414746543778802, "grad_norm": 4.227769374847412, "learning_rate": 9.57819095477387e-06, "loss": 0.0983, "step": 4700 }, { "epoch": 5.443548387096774, "grad_norm": 3.827526807785034, "learning_rate": 9.575678391959799e-06, "loss": 0.0917, "step": 4725 }, { "epoch": 5.472350230414746, "grad_norm": 3.860154151916504, "learning_rate": 9.57316582914573e-06, "loss": 0.0876, "step": 4750 }, { "epoch": 5.501152073732719, "grad_norm": 3.444704532623291, "learning_rate": 9.57065326633166e-06, "loss": 0.0914, "step": 4775 }, { "epoch": 5.529953917050691, "grad_norm": 3.8155128955841064, "learning_rate": 9.568140703517589e-06, "loss": 0.0941, "step": 4800 }, { "epoch": 5.558755760368664, "grad_norm": 3.697683572769165, "learning_rate": 9.565628140703518e-06, "loss": 0.0956, "step": 4825 }, { "epoch": 5.587557603686636, "grad_norm": 3.7999086380004883, "learning_rate": 9.563115577889447e-06, "loss": 0.095, "step": 4850 }, { "epoch": 5.6163594470046085, "grad_norm": 3.67737078666687, "learning_rate": 9.560603015075378e-06, "loss": 0.1011, "step": 4875 }, { "epoch": 5.645161290322581, "grad_norm": 4.022425174713135, "learning_rate": 9.558090452261308e-06, "loss": 0.0985, "step": 4900 }, { "epoch": 5.673963133640553, "grad_norm": 4.5046892166137695, "learning_rate": 9.555577889447237e-06, "loss": 0.1047, "step": 4925 }, { "epoch": 5.702764976958525, "grad_norm": 3.7830488681793213, "learning_rate": 9.553065326633166e-06, "loss": 0.0877, "step": 4950 }, { "epoch": 5.731566820276497, "grad_norm": 4.597214698791504, "learning_rate": 9.550552763819096e-06, "loss": 0.0979, "step": 4975 }, { "epoch": 5.76036866359447, "grad_norm": 4.325615882873535, "learning_rate": 9.548040201005025e-06, "loss": 0.1031, "step": 5000 }, { "epoch": 5.76036866359447, "eval_loss": 0.6474053859710693, "eval_runtime": 81.458, "eval_samples_per_second": 38.854, "eval_steps_per_second": 4.861, "eval_wer": 37.82524403931839, "step": 5000 }, { "epoch": 5.789170506912442, "grad_norm": 3.4761712551116943, "learning_rate": 9.545527638190956e-06, "loss": 0.0938, "step": 5025 }, { "epoch": 5.817972350230415, "grad_norm": 4.495656967163086, "learning_rate": 9.543015075376885e-06, "loss": 0.0957, "step": 5050 }, { "epoch": 5.846774193548387, "grad_norm": 3.7624871730804443, "learning_rate": 9.540502512562815e-06, "loss": 0.1021, "step": 5075 }, { "epoch": 5.875576036866359, "grad_norm": 4.013946533203125, "learning_rate": 9.537989949748746e-06, "loss": 0.0979, "step": 5100 }, { "epoch": 5.904377880184332, "grad_norm": 4.008698463439941, "learning_rate": 9.535477386934673e-06, "loss": 0.0933, "step": 5125 }, { "epoch": 5.933179723502304, "grad_norm": 4.813544750213623, "learning_rate": 9.532964824120604e-06, "loss": 0.0986, "step": 5150 }, { "epoch": 5.961981566820277, "grad_norm": 4.452573299407959, "learning_rate": 9.530452261306534e-06, "loss": 0.1007, "step": 5175 }, { "epoch": 5.990783410138249, "grad_norm": 5.0860090255737305, "learning_rate": 9.527939698492463e-06, "loss": 0.0995, "step": 5200 }, { "epoch": 6.0195852534562215, "grad_norm": 3.1574621200561523, "learning_rate": 9.525427135678392e-06, "loss": 0.0841, "step": 5225 }, { "epoch": 6.048387096774194, "grad_norm": 5.122866153717041, "learning_rate": 9.522914572864322e-06, "loss": 0.0652, "step": 5250 }, { "epoch": 6.0771889400921655, "grad_norm": 3.2906594276428223, "learning_rate": 9.520402010050253e-06, "loss": 0.0714, "step": 5275 }, { "epoch": 6.105990783410138, "grad_norm": 3.3200013637542725, "learning_rate": 9.517889447236182e-06, "loss": 0.0704, "step": 5300 }, { "epoch": 6.13479262672811, "grad_norm": 3.5484490394592285, "learning_rate": 9.515376884422111e-06, "loss": 0.0685, "step": 5325 }, { "epoch": 6.163594470046083, "grad_norm": 4.063160419464111, "learning_rate": 9.51286432160804e-06, "loss": 0.0704, "step": 5350 }, { "epoch": 6.192396313364055, "grad_norm": 3.2288167476654053, "learning_rate": 9.510351758793972e-06, "loss": 0.0682, "step": 5375 }, { "epoch": 6.221198156682028, "grad_norm": 3.0462543964385986, "learning_rate": 9.5078391959799e-06, "loss": 0.0714, "step": 5400 }, { "epoch": 6.25, "grad_norm": 3.731647253036499, "learning_rate": 9.50532663316583e-06, "loss": 0.0696, "step": 5425 }, { "epoch": 6.278801843317972, "grad_norm": 3.4777073860168457, "learning_rate": 9.50281407035176e-06, "loss": 0.0681, "step": 5450 }, { "epoch": 6.307603686635945, "grad_norm": 2.9124643802642822, "learning_rate": 9.500301507537689e-06, "loss": 0.0659, "step": 5475 }, { "epoch": 6.336405529953917, "grad_norm": 3.222686767578125, "learning_rate": 9.49778894472362e-06, "loss": 0.0728, "step": 5500 }, { "epoch": 6.36520737327189, "grad_norm": 3.0681943893432617, "learning_rate": 9.49527638190955e-06, "loss": 0.0728, "step": 5525 }, { "epoch": 6.394009216589862, "grad_norm": 5.52202033996582, "learning_rate": 9.492763819095479e-06, "loss": 0.0705, "step": 5550 }, { "epoch": 6.4228110599078345, "grad_norm": 2.7906172275543213, "learning_rate": 9.490251256281408e-06, "loss": 0.0688, "step": 5575 }, { "epoch": 6.451612903225806, "grad_norm": 4.448217868804932, "learning_rate": 9.487738693467337e-06, "loss": 0.0724, "step": 5600 }, { "epoch": 6.4804147465437785, "grad_norm": 3.9422543048858643, "learning_rate": 9.485226130653267e-06, "loss": 0.0757, "step": 5625 }, { "epoch": 6.509216589861751, "grad_norm": 3.492337465286255, "learning_rate": 9.482713567839198e-06, "loss": 0.073, "step": 5650 }, { "epoch": 6.538018433179723, "grad_norm": 3.891918420791626, "learning_rate": 9.480201005025125e-06, "loss": 0.0704, "step": 5675 }, { "epoch": 6.566820276497696, "grad_norm": 3.391314744949341, "learning_rate": 9.477688442211056e-06, "loss": 0.0676, "step": 5700 }, { "epoch": 6.595622119815668, "grad_norm": 3.971620559692383, "learning_rate": 9.475175879396985e-06, "loss": 0.0741, "step": 5725 }, { "epoch": 6.624423963133641, "grad_norm": 3.427788496017456, "learning_rate": 9.472663316582915e-06, "loss": 0.0762, "step": 5750 }, { "epoch": 6.653225806451613, "grad_norm": 3.3822622299194336, "learning_rate": 9.470150753768846e-06, "loss": 0.0711, "step": 5775 }, { "epoch": 6.682027649769585, "grad_norm": 3.0213544368743896, "learning_rate": 9.467638190954775e-06, "loss": 0.074, "step": 5800 }, { "epoch": 6.710829493087558, "grad_norm": 3.728795289993286, "learning_rate": 9.465125628140704e-06, "loss": 0.0715, "step": 5825 }, { "epoch": 6.73963133640553, "grad_norm": 3.7396397590637207, "learning_rate": 9.462613065326634e-06, "loss": 0.0757, "step": 5850 }, { "epoch": 6.768433179723503, "grad_norm": 4.286227226257324, "learning_rate": 9.460100502512563e-06, "loss": 0.072, "step": 5875 }, { "epoch": 6.797235023041475, "grad_norm": 3.382040500640869, "learning_rate": 9.457587939698494e-06, "loss": 0.0731, "step": 5900 }, { "epoch": 6.826036866359447, "grad_norm": 3.8695623874664307, "learning_rate": 9.455075376884423e-06, "loss": 0.0735, "step": 5925 }, { "epoch": 6.854838709677419, "grad_norm": 3.1111221313476562, "learning_rate": 9.452562814070353e-06, "loss": 0.0721, "step": 5950 }, { "epoch": 6.8836405529953915, "grad_norm": 4.860044002532959, "learning_rate": 9.450050251256282e-06, "loss": 0.0766, "step": 5975 }, { "epoch": 6.912442396313364, "grad_norm": 8.141301155090332, "learning_rate": 9.447537688442211e-06, "loss": 0.0773, "step": 6000 }, { "epoch": 6.912442396313364, "eval_loss": 0.667895495891571, "eval_runtime": 80.6955, "eval_samples_per_second": 39.221, "eval_steps_per_second": 4.907, "eval_wer": 37.927281384986905, "step": 6000 }, { "epoch": 6.941244239631336, "grad_norm": 4.5540571212768555, "learning_rate": 9.44502512562814e-06, "loss": 0.0771, "step": 6025 }, { "epoch": 6.970046082949309, "grad_norm": 4.217947483062744, "learning_rate": 9.442512562814072e-06, "loss": 0.0782, "step": 6050 }, { "epoch": 6.998847926267281, "grad_norm": 4.242874622344971, "learning_rate": 9.440000000000001e-06, "loss": 0.0713, "step": 6075 }, { "epoch": 7.027649769585254, "grad_norm": 2.195582866668701, "learning_rate": 9.43748743718593e-06, "loss": 0.0509, "step": 6100 }, { "epoch": 7.056451612903226, "grad_norm": 2.6373801231384277, "learning_rate": 9.43497487437186e-06, "loss": 0.0504, "step": 6125 }, { "epoch": 7.085253456221198, "grad_norm": 2.5395350456237793, "learning_rate": 9.432462311557789e-06, "loss": 0.051, "step": 6150 }, { "epoch": 7.114055299539171, "grad_norm": 2.2693610191345215, "learning_rate": 9.42994974874372e-06, "loss": 0.0504, "step": 6175 }, { "epoch": 7.142857142857143, "grad_norm": 2.4371633529663086, "learning_rate": 9.42743718592965e-06, "loss": 0.0521, "step": 6200 }, { "epoch": 7.171658986175116, "grad_norm": 2.7629618644714355, "learning_rate": 9.424924623115579e-06, "loss": 0.051, "step": 6225 }, { "epoch": 7.200460829493087, "grad_norm": 3.0735747814178467, "learning_rate": 9.422412060301508e-06, "loss": 0.0524, "step": 6250 }, { "epoch": 7.22926267281106, "grad_norm": 2.8185479640960693, "learning_rate": 9.419899497487437e-06, "loss": 0.0536, "step": 6275 }, { "epoch": 7.258064516129032, "grad_norm": 2.6703684329986572, "learning_rate": 9.417386934673367e-06, "loss": 0.0479, "step": 6300 }, { "epoch": 7.2868663594470044, "grad_norm": 2.4663658142089844, "learning_rate": 9.414874371859298e-06, "loss": 0.0502, "step": 6325 }, { "epoch": 7.315668202764977, "grad_norm": 3.120025634765625, "learning_rate": 9.412361809045227e-06, "loss": 0.054, "step": 6350 }, { "epoch": 7.344470046082949, "grad_norm": 2.643721103668213, "learning_rate": 9.409849246231156e-06, "loss": 0.0514, "step": 6375 }, { "epoch": 7.373271889400922, "grad_norm": 2.536241054534912, "learning_rate": 9.407336683417086e-06, "loss": 0.0515, "step": 6400 }, { "epoch": 7.402073732718894, "grad_norm": 2.8527159690856934, "learning_rate": 9.404824120603015e-06, "loss": 0.0545, "step": 6425 }, { "epoch": 7.4308755760368665, "grad_norm": 2.7098474502563477, "learning_rate": 9.402311557788946e-06, "loss": 0.053, "step": 6450 }, { "epoch": 7.459677419354839, "grad_norm": 2.6858792304992676, "learning_rate": 9.399798994974875e-06, "loss": 0.0528, "step": 6475 }, { "epoch": 7.488479262672811, "grad_norm": 2.2096633911132812, "learning_rate": 9.397286432160805e-06, "loss": 0.0515, "step": 6500 }, { "epoch": 7.517281105990783, "grad_norm": 2.750530481338501, "learning_rate": 9.394773869346736e-06, "loss": 0.0559, "step": 6525 }, { "epoch": 7.546082949308756, "grad_norm": 3.047398090362549, "learning_rate": 9.392261306532663e-06, "loss": 0.0569, "step": 6550 }, { "epoch": 7.574884792626728, "grad_norm": 3.783179759979248, "learning_rate": 9.389748743718594e-06, "loss": 0.0552, "step": 6575 }, { "epoch": 7.6036866359447, "grad_norm": 2.9773595333099365, "learning_rate": 9.387236180904524e-06, "loss": 0.0562, "step": 6600 }, { "epoch": 7.632488479262673, "grad_norm": 5.750194549560547, "learning_rate": 9.384723618090453e-06, "loss": 0.0561, "step": 6625 }, { "epoch": 7.661290322580645, "grad_norm": 2.4250428676605225, "learning_rate": 9.382211055276382e-06, "loss": 0.0524, "step": 6650 }, { "epoch": 7.690092165898617, "grad_norm": 4.601222038269043, "learning_rate": 9.379698492462312e-06, "loss": 0.0527, "step": 6675 }, { "epoch": 7.71889400921659, "grad_norm": 2.6401073932647705, "learning_rate": 9.377185929648241e-06, "loss": 0.0572, "step": 6700 }, { "epoch": 7.747695852534562, "grad_norm": 2.8957459926605225, "learning_rate": 9.374673366834172e-06, "loss": 0.0561, "step": 6725 }, { "epoch": 7.776497695852535, "grad_norm": 2.5037074089050293, "learning_rate": 9.372160804020101e-06, "loss": 0.0543, "step": 6750 }, { "epoch": 7.805299539170507, "grad_norm": 2.765925168991089, "learning_rate": 9.36964824120603e-06, "loss": 0.0535, "step": 6775 }, { "epoch": 7.8341013824884795, "grad_norm": 3.2086360454559326, "learning_rate": 9.367135678391962e-06, "loss": 0.0549, "step": 6800 }, { "epoch": 7.862903225806452, "grad_norm": 4.579031944274902, "learning_rate": 9.36462311557789e-06, "loss": 0.0559, "step": 6825 }, { "epoch": 7.8917050691244235, "grad_norm": 3.1290481090545654, "learning_rate": 9.36211055276382e-06, "loss": 0.0556, "step": 6850 }, { "epoch": 7.920506912442397, "grad_norm": 3.776870012283325, "learning_rate": 9.35959798994975e-06, "loss": 0.0563, "step": 6875 }, { "epoch": 7.949308755760368, "grad_norm": 3.926382303237915, "learning_rate": 9.357085427135679e-06, "loss": 0.0556, "step": 6900 }, { "epoch": 7.978110599078341, "grad_norm": 6.491419315338135, "learning_rate": 9.354572864321608e-06, "loss": 0.0567, "step": 6925 }, { "epoch": 8.006912442396313, "grad_norm": 2.15350604057312, "learning_rate": 9.352060301507538e-06, "loss": 0.0507, "step": 6950 }, { "epoch": 8.035714285714286, "grad_norm": 5.344366550445557, "learning_rate": 9.349547738693469e-06, "loss": 0.0362, "step": 6975 }, { "epoch": 8.064516129032258, "grad_norm": 3.120136022567749, "learning_rate": 9.347035175879398e-06, "loss": 0.037, "step": 7000 }, { "epoch": 8.064516129032258, "eval_loss": 0.6994675397872925, "eval_runtime": 80.772, "eval_samples_per_second": 39.184, "eval_steps_per_second": 4.903, "eval_wer": 37.81844154960716, "step": 7000 }, { "epoch": 8.09331797235023, "grad_norm": 2.118480682373047, "learning_rate": 9.344522613065327e-06, "loss": 0.0346, "step": 7025 }, { "epoch": 8.122119815668203, "grad_norm": 2.259484052658081, "learning_rate": 9.342010050251257e-06, "loss": 0.0375, "step": 7050 }, { "epoch": 8.150921658986174, "grad_norm": 2.2805166244506836, "learning_rate": 9.339497487437188e-06, "loss": 0.0378, "step": 7075 }, { "epoch": 8.179723502304148, "grad_norm": 1.838012933731079, "learning_rate": 9.337085427135679e-06, "loss": 0.0383, "step": 7100 }, { "epoch": 8.20852534562212, "grad_norm": 2.236102342605591, "learning_rate": 9.334572864321608e-06, "loss": 0.0385, "step": 7125 }, { "epoch": 8.237327188940093, "grad_norm": 2.633634328842163, "learning_rate": 9.332060301507538e-06, "loss": 0.0364, "step": 7150 }, { "epoch": 8.266129032258064, "grad_norm": 2.3115463256835938, "learning_rate": 9.329547738693469e-06, "loss": 0.0388, "step": 7175 }, { "epoch": 8.294930875576037, "grad_norm": 1.9687660932540894, "learning_rate": 9.327035175879398e-06, "loss": 0.0397, "step": 7200 }, { "epoch": 8.323732718894009, "grad_norm": 2.1913368701934814, "learning_rate": 9.324522613065327e-06, "loss": 0.0399, "step": 7225 }, { "epoch": 8.352534562211982, "grad_norm": 2.3736586570739746, "learning_rate": 9.322010050251257e-06, "loss": 0.0364, "step": 7250 }, { "epoch": 8.381336405529954, "grad_norm": 2.264538049697876, "learning_rate": 9.319497487437186e-06, "loss": 0.0362, "step": 7275 }, { "epoch": 8.410138248847927, "grad_norm": 2.5420360565185547, "learning_rate": 9.316984924623115e-06, "loss": 0.0376, "step": 7300 }, { "epoch": 8.438940092165899, "grad_norm": 4.384249687194824, "learning_rate": 9.314472361809046e-06, "loss": 0.0408, "step": 7325 }, { "epoch": 8.46774193548387, "grad_norm": 2.6533255577087402, "learning_rate": 9.311959798994976e-06, "loss": 0.0385, "step": 7350 }, { "epoch": 8.496543778801843, "grad_norm": 2.9183216094970703, "learning_rate": 9.309447236180905e-06, "loss": 0.0391, "step": 7375 }, { "epoch": 8.525345622119815, "grad_norm": 2.423549175262451, "learning_rate": 9.306934673366836e-06, "loss": 0.0416, "step": 7400 }, { "epoch": 8.554147465437788, "grad_norm": 2.5606589317321777, "learning_rate": 9.304422110552764e-06, "loss": 0.0398, "step": 7425 }, { "epoch": 8.58294930875576, "grad_norm": 2.9090375900268555, "learning_rate": 9.301909547738695e-06, "loss": 0.0384, "step": 7450 }, { "epoch": 8.611751152073733, "grad_norm": 3.5338408946990967, "learning_rate": 9.299396984924624e-06, "loss": 0.037, "step": 7475 }, { "epoch": 8.640552995391705, "grad_norm": 2.407071590423584, "learning_rate": 9.296884422110553e-06, "loss": 0.0421, "step": 7500 }, { "epoch": 8.669354838709678, "grad_norm": 2.7173545360565186, "learning_rate": 9.294371859296483e-06, "loss": 0.0388, "step": 7525 }, { "epoch": 8.69815668202765, "grad_norm": 3.189429759979248, "learning_rate": 9.291859296482412e-06, "loss": 0.0392, "step": 7550 }, { "epoch": 8.726958525345623, "grad_norm": 2.881145477294922, "learning_rate": 9.289346733668343e-06, "loss": 0.041, "step": 7575 }, { "epoch": 8.755760368663594, "grad_norm": 1.8787224292755127, "learning_rate": 9.286834170854272e-06, "loss": 0.0433, "step": 7600 }, { "epoch": 8.784562211981568, "grad_norm": 2.367095947265625, "learning_rate": 9.284321608040202e-06, "loss": 0.0414, "step": 7625 }, { "epoch": 8.81336405529954, "grad_norm": 2.651494264602661, "learning_rate": 9.281809045226131e-06, "loss": 0.0401, "step": 7650 }, { "epoch": 8.842165898617512, "grad_norm": 2.554684638977051, "learning_rate": 9.279296482412062e-06, "loss": 0.0423, "step": 7675 }, { "epoch": 8.870967741935484, "grad_norm": 2.4025726318359375, "learning_rate": 9.27678391959799e-06, "loss": 0.0396, "step": 7700 }, { "epoch": 8.899769585253456, "grad_norm": 2.795933485031128, "learning_rate": 9.27427135678392e-06, "loss": 0.0415, "step": 7725 }, { "epoch": 8.928571428571429, "grad_norm": 4.275112628936768, "learning_rate": 9.27175879396985e-06, "loss": 0.0409, "step": 7750 }, { "epoch": 8.9573732718894, "grad_norm": 2.412587881088257, "learning_rate": 9.26924623115578e-06, "loss": 0.0398, "step": 7775 }, { "epoch": 8.986175115207374, "grad_norm": 2.7251672744750977, "learning_rate": 9.26673366834171e-06, "loss": 0.0434, "step": 7800 }, { "epoch": 9.014976958525345, "grad_norm": 1.5067108869552612, "learning_rate": 9.264221105527638e-06, "loss": 0.0339, "step": 7825 }, { "epoch": 9.043778801843319, "grad_norm": 2.626227617263794, "learning_rate": 9.261708542713569e-06, "loss": 0.0267, "step": 7850 }, { "epoch": 9.07258064516129, "grad_norm": 1.8383318185806274, "learning_rate": 9.259195979899498e-06, "loss": 0.0268, "step": 7875 }, { "epoch": 9.101382488479263, "grad_norm": 1.8432179689407349, "learning_rate": 9.256683417085428e-06, "loss": 0.0275, "step": 7900 }, { "epoch": 9.130184331797235, "grad_norm": 3.637984275817871, "learning_rate": 9.254170854271357e-06, "loss": 0.0285, "step": 7925 }, { "epoch": 9.158986175115208, "grad_norm": 2.2877626419067383, "learning_rate": 9.251658291457288e-06, "loss": 0.027, "step": 7950 }, { "epoch": 9.18778801843318, "grad_norm": 1.6793689727783203, "learning_rate": 9.249145728643217e-06, "loss": 0.0279, "step": 7975 }, { "epoch": 9.216589861751151, "grad_norm": 2.4668664932250977, "learning_rate": 9.246633165829147e-06, "loss": 0.028, "step": 8000 }, { "epoch": 9.216589861751151, "eval_loss": 0.730718195438385, "eval_runtime": 81.8811, "eval_samples_per_second": 38.654, "eval_steps_per_second": 4.836, "eval_wer": 38.43406686847386, "step": 8000 }, { "epoch": 9.245391705069125, "grad_norm": 1.7751166820526123, "learning_rate": 9.244120603015076e-06, "loss": 0.026, "step": 8025 }, { "epoch": 9.274193548387096, "grad_norm": 1.9020662307739258, "learning_rate": 9.241608040201005e-06, "loss": 0.0254, "step": 8050 }, { "epoch": 9.30299539170507, "grad_norm": 1.524022102355957, "learning_rate": 9.239095477386936e-06, "loss": 0.0276, "step": 8075 }, { "epoch": 9.331797235023041, "grad_norm": 1.5596990585327148, "learning_rate": 9.236582914572864e-06, "loss": 0.0293, "step": 8100 }, { "epoch": 9.360599078341014, "grad_norm": 1.9601891040802002, "learning_rate": 9.234070351758795e-06, "loss": 0.029, "step": 8125 }, { "epoch": 9.389400921658986, "grad_norm": 2.091620922088623, "learning_rate": 9.231557788944724e-06, "loss": 0.0305, "step": 8150 }, { "epoch": 9.418202764976959, "grad_norm": 2.122602939605713, "learning_rate": 9.229045226130654e-06, "loss": 0.0306, "step": 8175 }, { "epoch": 9.44700460829493, "grad_norm": 1.8378323316574097, "learning_rate": 9.226532663316585e-06, "loss": 0.0271, "step": 8200 }, { "epoch": 9.475806451612904, "grad_norm": 1.8325691223144531, "learning_rate": 9.224020100502514e-06, "loss": 0.0282, "step": 8225 }, { "epoch": 9.504608294930875, "grad_norm": 2.0037741661071777, "learning_rate": 9.221507537688443e-06, "loss": 0.0263, "step": 8250 }, { "epoch": 9.533410138248849, "grad_norm": 3.9526848793029785, "learning_rate": 9.218994974874373e-06, "loss": 0.0272, "step": 8275 }, { "epoch": 9.56221198156682, "grad_norm": 1.7334246635437012, "learning_rate": 9.216482412060302e-06, "loss": 0.0283, "step": 8300 }, { "epoch": 9.591013824884792, "grad_norm": 1.8288161754608154, "learning_rate": 9.213969849246231e-06, "loss": 0.0303, "step": 8325 }, { "epoch": 9.619815668202765, "grad_norm": 1.877044677734375, "learning_rate": 9.211457286432162e-06, "loss": 0.0333, "step": 8350 }, { "epoch": 9.648617511520737, "grad_norm": 2.6431884765625, "learning_rate": 9.20894472361809e-06, "loss": 0.0307, "step": 8375 }, { "epoch": 9.67741935483871, "grad_norm": 3.2045183181762695, "learning_rate": 9.206432160804021e-06, "loss": 0.0286, "step": 8400 }, { "epoch": 9.706221198156681, "grad_norm": 2.2130846977233887, "learning_rate": 9.20391959798995e-06, "loss": 0.0279, "step": 8425 }, { "epoch": 9.735023041474655, "grad_norm": 2.515073537826538, "learning_rate": 9.20140703517588e-06, "loss": 0.028, "step": 8450 }, { "epoch": 9.763824884792626, "grad_norm": 2.2594313621520996, "learning_rate": 9.19889447236181e-06, "loss": 0.0276, "step": 8475 }, { "epoch": 9.7926267281106, "grad_norm": 1.7898566722869873, "learning_rate": 9.19638190954774e-06, "loss": 0.0297, "step": 8500 }, { "epoch": 9.821428571428571, "grad_norm": 2.085634231567383, "learning_rate": 9.19386934673367e-06, "loss": 0.0269, "step": 8525 }, { "epoch": 9.850230414746544, "grad_norm": 3.9735524654388428, "learning_rate": 9.191356783919599e-06, "loss": 0.0309, "step": 8550 }, { "epoch": 9.879032258064516, "grad_norm": 4.49285364151001, "learning_rate": 9.188844221105528e-06, "loss": 0.0268, "step": 8575 }, { "epoch": 9.907834101382488, "grad_norm": 1.7210360765457153, "learning_rate": 9.186331658291459e-06, "loss": 0.0303, "step": 8600 }, { "epoch": 9.93663594470046, "grad_norm": 4.3028106689453125, "learning_rate": 9.183819095477388e-06, "loss": 0.028, "step": 8625 }, { "epoch": 9.965437788018432, "grad_norm": 2.0157089233398438, "learning_rate": 9.181306532663317e-06, "loss": 0.0302, "step": 8650 }, { "epoch": 9.994239631336406, "grad_norm": 2.1320486068725586, "learning_rate": 9.178793969849247e-06, "loss": 0.03, "step": 8675 }, { "epoch": 10.023041474654377, "grad_norm": 1.2078992128372192, "learning_rate": 9.176281407035176e-06, "loss": 0.0203, "step": 8700 }, { "epoch": 10.05184331797235, "grad_norm": 1.1216517686843872, "learning_rate": 9.173768844221105e-06, "loss": 0.0187, "step": 8725 }, { "epoch": 10.080645161290322, "grad_norm": 1.3258930444717407, "learning_rate": 9.171256281407036e-06, "loss": 0.0177, "step": 8750 }, { "epoch": 10.109447004608295, "grad_norm": 2.105889081954956, "learning_rate": 9.168743718592966e-06, "loss": 0.0199, "step": 8775 }, { "epoch": 10.138248847926267, "grad_norm": 1.7468386888504028, "learning_rate": 9.166231155778895e-06, "loss": 0.0199, "step": 8800 }, { "epoch": 10.16705069124424, "grad_norm": 2.7161872386932373, "learning_rate": 9.163718592964826e-06, "loss": 0.0207, "step": 8825 }, { "epoch": 10.195852534562212, "grad_norm": 1.298128366470337, "learning_rate": 9.161206030150754e-06, "loss": 0.0195, "step": 8850 }, { "epoch": 10.224654377880185, "grad_norm": 4.449765205383301, "learning_rate": 9.158693467336685e-06, "loss": 0.02, "step": 8875 }, { "epoch": 10.253456221198157, "grad_norm": 1.2394945621490479, "learning_rate": 9.156180904522614e-06, "loss": 0.019, "step": 8900 }, { "epoch": 10.28225806451613, "grad_norm": 1.2460911273956299, "learning_rate": 9.153668341708543e-06, "loss": 0.0206, "step": 8925 }, { "epoch": 10.311059907834101, "grad_norm": 1.8825390338897705, "learning_rate": 9.151155778894473e-06, "loss": 0.0192, "step": 8950 }, { "epoch": 10.339861751152073, "grad_norm": 2.195871591567993, "learning_rate": 9.148643216080402e-06, "loss": 0.0227, "step": 8975 }, { "epoch": 10.368663594470046, "grad_norm": 2.0863699913024902, "learning_rate": 9.146130653266331e-06, "loss": 0.0201, "step": 9000 }, { "epoch": 10.368663594470046, "eval_loss": 0.7542567849159241, "eval_runtime": 81.2831, "eval_samples_per_second": 38.938, "eval_steps_per_second": 4.872, "eval_wer": 38.38644944049523, "step": 9000 }, { "epoch": 10.397465437788018, "grad_norm": 2.0956614017486572, "learning_rate": 9.143618090452262e-06, "loss": 0.021, "step": 9025 }, { "epoch": 10.426267281105991, "grad_norm": 1.9071835279464722, "learning_rate": 9.141105527638192e-06, "loss": 0.0212, "step": 9050 }, { "epoch": 10.455069124423963, "grad_norm": 1.6553027629852295, "learning_rate": 9.138592964824121e-06, "loss": 0.0211, "step": 9075 }, { "epoch": 10.483870967741936, "grad_norm": 1.6388055086135864, "learning_rate": 9.136080402010052e-06, "loss": 0.0199, "step": 9100 }, { "epoch": 10.512672811059907, "grad_norm": 5.796273708343506, "learning_rate": 9.13356783919598e-06, "loss": 0.0221, "step": 9125 }, { "epoch": 10.54147465437788, "grad_norm": 2.8014345169067383, "learning_rate": 9.13105527638191e-06, "loss": 0.0231, "step": 9150 }, { "epoch": 10.570276497695852, "grad_norm": 2.1984214782714844, "learning_rate": 9.12854271356784e-06, "loss": 0.0196, "step": 9175 }, { "epoch": 10.599078341013826, "grad_norm": 4.029669284820557, "learning_rate": 9.12603015075377e-06, "loss": 0.0209, "step": 9200 }, { "epoch": 10.627880184331797, "grad_norm": 1.6487455368041992, "learning_rate": 9.1235175879397e-06, "loss": 0.0219, "step": 9225 }, { "epoch": 10.656682027649769, "grad_norm": 4.169697284698486, "learning_rate": 9.121005025125628e-06, "loss": 0.0213, "step": 9250 }, { "epoch": 10.685483870967742, "grad_norm": 1.2574188709259033, "learning_rate": 9.118492462311559e-06, "loss": 0.0211, "step": 9275 }, { "epoch": 10.714285714285714, "grad_norm": 1.958390474319458, "learning_rate": 9.115979899497488e-06, "loss": 0.0198, "step": 9300 }, { "epoch": 10.743087557603687, "grad_norm": 1.744756817817688, "learning_rate": 9.113467336683418e-06, "loss": 0.0201, "step": 9325 }, { "epoch": 10.771889400921658, "grad_norm": 2.4313881397247314, "learning_rate": 9.110954773869347e-06, "loss": 0.0224, "step": 9350 }, { "epoch": 10.800691244239632, "grad_norm": 1.4326667785644531, "learning_rate": 9.108442211055278e-06, "loss": 0.0204, "step": 9375 }, { "epoch": 10.829493087557603, "grad_norm": 1.4043165445327759, "learning_rate": 9.105929648241206e-06, "loss": 0.0226, "step": 9400 }, { "epoch": 10.858294930875577, "grad_norm": 1.4871457815170288, "learning_rate": 9.1035175879397e-06, "loss": 0.0193, "step": 9425 }, { "epoch": 10.887096774193548, "grad_norm": 1.765453577041626, "learning_rate": 9.101005025125628e-06, "loss": 0.0227, "step": 9450 }, { "epoch": 10.915898617511521, "grad_norm": 1.6091750860214233, "learning_rate": 9.09849246231156e-06, "loss": 0.0213, "step": 9475 }, { "epoch": 10.944700460829493, "grad_norm": 20.02227020263672, "learning_rate": 9.095979899497489e-06, "loss": 0.0225, "step": 9500 }, { "epoch": 10.973502304147466, "grad_norm": 1.7145785093307495, "learning_rate": 9.093467336683418e-06, "loss": 0.0226, "step": 9525 }, { "epoch": 11.002304147465438, "grad_norm": 6.202127456665039, "learning_rate": 9.090954773869347e-06, "loss": 0.0218, "step": 9550 }, { "epoch": 11.03110599078341, "grad_norm": 1.2881903648376465, "learning_rate": 9.088442211055277e-06, "loss": 0.0139, "step": 9575 }, { "epoch": 11.059907834101383, "grad_norm": 1.4447721242904663, "learning_rate": 9.085929648241206e-06, "loss": 0.0134, "step": 9600 }, { "epoch": 11.088709677419354, "grad_norm": 1.3663309812545776, "learning_rate": 9.083417085427137e-06, "loss": 0.0141, "step": 9625 }, { "epoch": 11.117511520737327, "grad_norm": 1.1117240190505981, "learning_rate": 9.080904522613066e-06, "loss": 0.0142, "step": 9650 }, { "epoch": 11.146313364055299, "grad_norm": 1.839056372642517, "learning_rate": 9.078391959798996e-06, "loss": 0.0144, "step": 9675 }, { "epoch": 11.175115207373272, "grad_norm": 1.192344307899475, "learning_rate": 9.075879396984927e-06, "loss": 0.0152, "step": 9700 }, { "epoch": 11.203917050691244, "grad_norm": 0.7441015243530273, "learning_rate": 9.073366834170854e-06, "loss": 0.015, "step": 9725 }, { "epoch": 11.232718894009217, "grad_norm": 1.0923099517822266, "learning_rate": 9.070854271356785e-06, "loss": 0.0153, "step": 9750 }, { "epoch": 11.261520737327189, "grad_norm": 2.230032444000244, "learning_rate": 9.068341708542715e-06, "loss": 0.0149, "step": 9775 }, { "epoch": 11.290322580645162, "grad_norm": 1.134871482849121, "learning_rate": 9.065829145728644e-06, "loss": 0.0139, "step": 9800 }, { "epoch": 11.319124423963133, "grad_norm": 1.4531220197677612, "learning_rate": 9.063316582914573e-06, "loss": 0.0153, "step": 9825 }, { "epoch": 11.347926267281107, "grad_norm": 4.734939098358154, "learning_rate": 9.060804020100502e-06, "loss": 0.0159, "step": 9850 }, { "epoch": 11.376728110599078, "grad_norm": 4.08478307723999, "learning_rate": 9.058291457286433e-06, "loss": 0.0143, "step": 9875 }, { "epoch": 11.40552995391705, "grad_norm": 1.9807573556900024, "learning_rate": 9.055778894472363e-06, "loss": 0.015, "step": 9900 }, { "epoch": 11.434331797235023, "grad_norm": 1.3398187160491943, "learning_rate": 9.053266331658292e-06, "loss": 0.0148, "step": 9925 }, { "epoch": 11.463133640552995, "grad_norm": 1.698241949081421, "learning_rate": 9.050753768844221e-06, "loss": 0.0159, "step": 9950 }, { "epoch": 11.491935483870968, "grad_norm": 1.9438401460647583, "learning_rate": 9.048241206030152e-06, "loss": 0.0164, "step": 9975 }, { "epoch": 11.52073732718894, "grad_norm": 3.160419464111328, "learning_rate": 9.04572864321608e-06, "loss": 0.0167, "step": 10000 }, { "epoch": 11.52073732718894, "eval_loss": 0.782230019569397, "eval_runtime": 81.9478, "eval_samples_per_second": 38.622, "eval_steps_per_second": 4.832, "eval_wer": 38.51229550015305, "step": 10000 }, { "epoch": 11.549539170506913, "grad_norm": 3.788954257965088, "learning_rate": 9.043216080402011e-06, "loss": 0.0151, "step": 10025 }, { "epoch": 11.578341013824884, "grad_norm": 1.382492184638977, "learning_rate": 9.04070351758794e-06, "loss": 0.0156, "step": 10050 }, { "epoch": 11.607142857142858, "grad_norm": 1.5896028280258179, "learning_rate": 9.03819095477387e-06, "loss": 0.0148, "step": 10075 }, { "epoch": 11.63594470046083, "grad_norm": 4.558351039886475, "learning_rate": 9.0356783919598e-06, "loss": 0.0153, "step": 10100 }, { "epoch": 11.664746543778802, "grad_norm": 1.8593884706497192, "learning_rate": 9.033165829145728e-06, "loss": 0.0153, "step": 10125 }, { "epoch": 11.693548387096774, "grad_norm": 1.8850159645080566, "learning_rate": 9.03065326633166e-06, "loss": 0.0154, "step": 10150 }, { "epoch": 11.722350230414747, "grad_norm": 1.3466520309448242, "learning_rate": 9.028140703517589e-06, "loss": 0.0162, "step": 10175 }, { "epoch": 11.751152073732719, "grad_norm": 2.5634868144989014, "learning_rate": 9.025628140703518e-06, "loss": 0.0182, "step": 10200 }, { "epoch": 11.77995391705069, "grad_norm": 1.5896390676498413, "learning_rate": 9.023115577889447e-06, "loss": 0.016, "step": 10225 }, { "epoch": 11.808755760368664, "grad_norm": 3.221112012863159, "learning_rate": 9.020603015075378e-06, "loss": 0.0149, "step": 10250 }, { "epoch": 11.837557603686635, "grad_norm": 2.469203233718872, "learning_rate": 9.018090452261308e-06, "loss": 0.0169, "step": 10275 }, { "epoch": 11.866359447004609, "grad_norm": 1.2563629150390625, "learning_rate": 9.015577889447237e-06, "loss": 0.0153, "step": 10300 }, { "epoch": 11.89516129032258, "grad_norm": 1.1661770343780518, "learning_rate": 9.013065326633166e-06, "loss": 0.0156, "step": 10325 }, { "epoch": 11.923963133640553, "grad_norm": 1.820522427558899, "learning_rate": 9.010552763819096e-06, "loss": 0.0153, "step": 10350 }, { "epoch": 11.952764976958525, "grad_norm": 1.6093761920928955, "learning_rate": 9.008040201005027e-06, "loss": 0.0168, "step": 10375 }, { "epoch": 11.981566820276498, "grad_norm": 1.1536988019943237, "learning_rate": 9.005527638190954e-06, "loss": 0.0156, "step": 10400 }, { "epoch": 12.01036866359447, "grad_norm": 2.4196181297302246, "learning_rate": 9.003015075376885e-06, "loss": 0.0131, "step": 10425 }, { "epoch": 12.039170506912443, "grad_norm": 1.315458059310913, "learning_rate": 9.000502512562815e-06, "loss": 0.011, "step": 10450 }, { "epoch": 12.067972350230415, "grad_norm": 1.2690773010253906, "learning_rate": 8.997989949748744e-06, "loss": 0.0102, "step": 10475 }, { "epoch": 12.096774193548388, "grad_norm": 1.1760327816009521, "learning_rate": 8.995477386934675e-06, "loss": 0.0103, "step": 10500 }, { "epoch": 12.12557603686636, "grad_norm": 0.9241305589675903, "learning_rate": 8.992964824120604e-06, "loss": 0.0104, "step": 10525 }, { "epoch": 12.154377880184331, "grad_norm": 1.5326272249221802, "learning_rate": 8.990452261306534e-06, "loss": 0.0107, "step": 10550 }, { "epoch": 12.183179723502304, "grad_norm": 2.916841745376587, "learning_rate": 8.987939698492463e-06, "loss": 0.0107, "step": 10575 }, { "epoch": 12.211981566820276, "grad_norm": 0.9105071425437927, "learning_rate": 8.985427135678392e-06, "loss": 0.0102, "step": 10600 }, { "epoch": 12.240783410138249, "grad_norm": 1.3011952638626099, "learning_rate": 8.982914572864322e-06, "loss": 0.0106, "step": 10625 }, { "epoch": 12.26958525345622, "grad_norm": 0.6928938627243042, "learning_rate": 8.980402010050253e-06, "loss": 0.0112, "step": 10650 }, { "epoch": 12.298387096774194, "grad_norm": 0.8029574751853943, "learning_rate": 8.977889447236182e-06, "loss": 0.011, "step": 10675 }, { "epoch": 12.327188940092165, "grad_norm": 1.112497091293335, "learning_rate": 8.975376884422111e-06, "loss": 0.0103, "step": 10700 }, { "epoch": 12.355990783410139, "grad_norm": 0.7196361422538757, "learning_rate": 8.97286432160804e-06, "loss": 0.0109, "step": 10725 }, { "epoch": 12.38479262672811, "grad_norm": 1.0737648010253906, "learning_rate": 8.97035175879397e-06, "loss": 0.0103, "step": 10750 }, { "epoch": 12.413594470046084, "grad_norm": 1.8710594177246094, "learning_rate": 8.967839195979901e-06, "loss": 0.0107, "step": 10775 }, { "epoch": 12.442396313364055, "grad_norm": 3.5749380588531494, "learning_rate": 8.96532663316583e-06, "loss": 0.012, "step": 10800 }, { "epoch": 12.471198156682028, "grad_norm": 1.853403925895691, "learning_rate": 8.96281407035176e-06, "loss": 0.0104, "step": 10825 }, { "epoch": 12.5, "grad_norm": 1.2189617156982422, "learning_rate": 8.960301507537689e-06, "loss": 0.0109, "step": 10850 }, { "epoch": 12.528801843317972, "grad_norm": 2.536036491394043, "learning_rate": 8.957788944723618e-06, "loss": 0.0112, "step": 10875 }, { "epoch": 12.557603686635945, "grad_norm": 0.9198225140571594, "learning_rate": 8.95527638190955e-06, "loss": 0.0112, "step": 10900 }, { "epoch": 12.586405529953916, "grad_norm": 1.0974763631820679, "learning_rate": 8.952763819095479e-06, "loss": 0.011, "step": 10925 }, { "epoch": 12.61520737327189, "grad_norm": 1.8058916330337524, "learning_rate": 8.950251256281408e-06, "loss": 0.0118, "step": 10950 }, { "epoch": 12.644009216589861, "grad_norm": 1.5306389331817627, "learning_rate": 8.947738693467337e-06, "loss": 0.0114, "step": 10975 }, { "epoch": 12.672811059907835, "grad_norm": 1.6134413480758667, "learning_rate": 8.945226130653267e-06, "loss": 0.0118, "step": 11000 }, { "epoch": 12.672811059907835, "eval_loss": 0.8012255430221558, "eval_runtime": 82.1822, "eval_samples_per_second": 38.512, "eval_steps_per_second": 4.819, "eval_wer": 38.593925376687864, "step": 11000 }, { "epoch": 12.701612903225806, "grad_norm": 1.0030903816223145, "learning_rate": 8.942713567839196e-06, "loss": 0.0113, "step": 11025 }, { "epoch": 12.73041474654378, "grad_norm": 1.451919674873352, "learning_rate": 8.940201005025127e-06, "loss": 0.0119, "step": 11050 }, { "epoch": 12.759216589861751, "grad_norm": 1.2492125034332275, "learning_rate": 8.937688442211056e-06, "loss": 0.0118, "step": 11075 }, { "epoch": 12.788018433179724, "grad_norm": 0.9921176433563232, "learning_rate": 8.935175879396986e-06, "loss": 0.011, "step": 11100 }, { "epoch": 12.816820276497696, "grad_norm": 1.3765182495117188, "learning_rate": 8.932663316582915e-06, "loss": 0.0119, "step": 11125 }, { "epoch": 12.845622119815669, "grad_norm": 2.316406726837158, "learning_rate": 8.930150753768844e-06, "loss": 0.0116, "step": 11150 }, { "epoch": 12.87442396313364, "grad_norm": 2.1885578632354736, "learning_rate": 8.927638190954775e-06, "loss": 0.0116, "step": 11175 }, { "epoch": 12.903225806451612, "grad_norm": 1.870403528213501, "learning_rate": 8.925125628140705e-06, "loss": 0.0116, "step": 11200 }, { "epoch": 12.932027649769585, "grad_norm": 1.4219310283660889, "learning_rate": 8.922613065326634e-06, "loss": 0.0121, "step": 11225 }, { "epoch": 12.960829493087557, "grad_norm": 1.6857144832611084, "learning_rate": 8.920100502512563e-06, "loss": 0.0117, "step": 11250 }, { "epoch": 12.98963133640553, "grad_norm": 1.1095285415649414, "learning_rate": 8.917587939698493e-06, "loss": 0.0129, "step": 11275 }, { "epoch": 13.018433179723502, "grad_norm": 1.647140383720398, "learning_rate": 8.915075376884424e-06, "loss": 0.009, "step": 11300 }, { "epoch": 13.047235023041475, "grad_norm": 1.279297947883606, "learning_rate": 8.912562814070353e-06, "loss": 0.0084, "step": 11325 }, { "epoch": 13.076036866359447, "grad_norm": 0.6342019438743591, "learning_rate": 8.910050251256282e-06, "loss": 0.0077, "step": 11350 }, { "epoch": 13.10483870967742, "grad_norm": 0.7389745712280273, "learning_rate": 8.907537688442212e-06, "loss": 0.0073, "step": 11375 }, { "epoch": 13.133640552995391, "grad_norm": 0.9078971743583679, "learning_rate": 8.905025125628143e-06, "loss": 0.0076, "step": 11400 }, { "epoch": 13.162442396313365, "grad_norm": 2.7326436042785645, "learning_rate": 8.90251256281407e-06, "loss": 0.008, "step": 11425 }, { "epoch": 13.191244239631336, "grad_norm": 0.8379087448120117, "learning_rate": 8.900000000000001e-06, "loss": 0.0081, "step": 11450 }, { "epoch": 13.22004608294931, "grad_norm": 0.7725964188575745, "learning_rate": 8.89748743718593e-06, "loss": 0.0076, "step": 11475 }, { "epoch": 13.248847926267281, "grad_norm": 0.6872130632400513, "learning_rate": 8.89497487437186e-06, "loss": 0.0077, "step": 11500 }, { "epoch": 13.277649769585253, "grad_norm": 1.288282036781311, "learning_rate": 8.892462311557791e-06, "loss": 0.0077, "step": 11525 }, { "epoch": 13.306451612903226, "grad_norm": 0.5300800204277039, "learning_rate": 8.889949748743718e-06, "loss": 0.0078, "step": 11550 }, { "epoch": 13.335253456221198, "grad_norm": 0.8753048777580261, "learning_rate": 8.88743718592965e-06, "loss": 0.0077, "step": 11575 }, { "epoch": 13.36405529953917, "grad_norm": 1.328315019607544, "learning_rate": 8.884924623115579e-06, "loss": 0.0076, "step": 11600 }, { "epoch": 13.392857142857142, "grad_norm": 0.6952997446060181, "learning_rate": 8.882412060301508e-06, "loss": 0.0081, "step": 11625 }, { "epoch": 13.421658986175116, "grad_norm": 0.8020520210266113, "learning_rate": 8.879899497487437e-06, "loss": 0.0073, "step": 11650 }, { "epoch": 13.450460829493087, "grad_norm": 0.6021475195884705, "learning_rate": 8.877386934673368e-06, "loss": 0.0075, "step": 11675 }, { "epoch": 13.47926267281106, "grad_norm": 0.6865134239196777, "learning_rate": 8.874874371859296e-06, "loss": 0.009, "step": 11700 }, { "epoch": 13.508064516129032, "grad_norm": 0.7802244424819946, "learning_rate": 8.872361809045227e-06, "loss": 0.0089, "step": 11725 }, { "epoch": 13.536866359447005, "grad_norm": 0.8716938495635986, "learning_rate": 8.869849246231156e-06, "loss": 0.0085, "step": 11750 }, { "epoch": 13.565668202764977, "grad_norm": 1.4313287734985352, "learning_rate": 8.867336683417086e-06, "loss": 0.0085, "step": 11775 }, { "epoch": 13.59447004608295, "grad_norm": 2.2919585704803467, "learning_rate": 8.864824120603017e-06, "loss": 0.0087, "step": 11800 }, { "epoch": 13.623271889400922, "grad_norm": 0.7238638997077942, "learning_rate": 8.862311557788944e-06, "loss": 0.0085, "step": 11825 }, { "epoch": 13.652073732718893, "grad_norm": 0.5909668207168579, "learning_rate": 8.859798994974875e-06, "loss": 0.0086, "step": 11850 }, { "epoch": 13.680875576036867, "grad_norm": 1.3301033973693848, "learning_rate": 8.857286432160805e-06, "loss": 0.0083, "step": 11875 }, { "epoch": 13.709677419354838, "grad_norm": 0.5700967907905579, "learning_rate": 8.854773869346734e-06, "loss": 0.009, "step": 11900 }, { "epoch": 13.738479262672811, "grad_norm": 0.6307144165039062, "learning_rate": 8.852261306532665e-06, "loss": 0.0088, "step": 11925 }, { "epoch": 13.767281105990783, "grad_norm": 1.0527507066726685, "learning_rate": 8.849748743718594e-06, "loss": 0.0084, "step": 11950 }, { "epoch": 13.796082949308756, "grad_norm": 0.7778949737548828, "learning_rate": 8.847236180904524e-06, "loss": 0.0095, "step": 11975 }, { "epoch": 13.824884792626728, "grad_norm": 0.6615479588508606, "learning_rate": 8.844723618090453e-06, "loss": 0.0086, "step": 12000 }, { "epoch": 13.824884792626728, "eval_loss": 0.8289599418640137, "eval_runtime": 83.8714, "eval_samples_per_second": 37.736, "eval_steps_per_second": 4.722, "eval_wer": 38.70276521206762, "step": 12000 }, { "epoch": 13.853686635944701, "grad_norm": 1.8682621717453003, "learning_rate": 8.842211055276382e-06, "loss": 0.0091, "step": 12025 }, { "epoch": 13.882488479262673, "grad_norm": 0.9698368310928345, "learning_rate": 8.839698492462312e-06, "loss": 0.0082, "step": 12050 }, { "epoch": 13.911290322580646, "grad_norm": 1.0168646574020386, "learning_rate": 8.837185929648243e-06, "loss": 0.0086, "step": 12075 }, { "epoch": 13.940092165898617, "grad_norm": 4.033752918243408, "learning_rate": 8.83467336683417e-06, "loss": 0.0087, "step": 12100 }, { "epoch": 13.96889400921659, "grad_norm": 2.0520637035369873, "learning_rate": 8.832160804020101e-06, "loss": 0.01, "step": 12125 }, { "epoch": 13.997695852534562, "grad_norm": 1.1656450033187866, "learning_rate": 8.82964824120603e-06, "loss": 0.0091, "step": 12150 }, { "epoch": 14.026497695852534, "grad_norm": 0.48660245537757874, "learning_rate": 8.82713567839196e-06, "loss": 0.0066, "step": 12175 }, { "epoch": 14.055299539170507, "grad_norm": 0.5222777724266052, "learning_rate": 8.824623115577891e-06, "loss": 0.0069, "step": 12200 }, { "epoch": 14.084101382488479, "grad_norm": 0.6771835088729858, "learning_rate": 8.82211055276382e-06, "loss": 0.0062, "step": 12225 }, { "epoch": 14.112903225806452, "grad_norm": 0.46743452548980713, "learning_rate": 8.81959798994975e-06, "loss": 0.0066, "step": 12250 }, { "epoch": 14.141705069124423, "grad_norm": 0.504743218421936, "learning_rate": 8.817085427135679e-06, "loss": 0.0059, "step": 12275 }, { "epoch": 14.170506912442397, "grad_norm": 0.4450019598007202, "learning_rate": 8.814572864321608e-06, "loss": 0.0059, "step": 12300 }, { "epoch": 14.199308755760368, "grad_norm": 1.0690401792526245, "learning_rate": 8.812060301507538e-06, "loss": 0.0065, "step": 12325 }, { "epoch": 14.228110599078342, "grad_norm": 0.47922977805137634, "learning_rate": 8.809547738693469e-06, "loss": 0.0059, "step": 12350 }, { "epoch": 14.256912442396313, "grad_norm": 1.295624017715454, "learning_rate": 8.807035175879398e-06, "loss": 0.0061, "step": 12375 }, { "epoch": 14.285714285714286, "grad_norm": 0.4133555293083191, "learning_rate": 8.804522613065327e-06, "loss": 0.0067, "step": 12400 }, { "epoch": 14.314516129032258, "grad_norm": 0.42622822523117065, "learning_rate": 8.802010050251257e-06, "loss": 0.0062, "step": 12425 }, { "epoch": 14.343317972350231, "grad_norm": 0.6265550851821899, "learning_rate": 8.799497487437186e-06, "loss": 0.0058, "step": 12450 }, { "epoch": 14.372119815668203, "grad_norm": 0.36786454916000366, "learning_rate": 8.796984924623117e-06, "loss": 0.0062, "step": 12475 }, { "epoch": 14.400921658986174, "grad_norm": 0.5231978893280029, "learning_rate": 8.794472361809046e-06, "loss": 0.0062, "step": 12500 }, { "epoch": 14.429723502304148, "grad_norm": 0.5160797238349915, "learning_rate": 8.791959798994976e-06, "loss": 0.0063, "step": 12525 }, { "epoch": 14.45852534562212, "grad_norm": 0.7744293808937073, "learning_rate": 8.789447236180905e-06, "loss": 0.0064, "step": 12550 }, { "epoch": 14.487327188940093, "grad_norm": 0.7569948434829712, "learning_rate": 8.786934673366834e-06, "loss": 0.0068, "step": 12575 }, { "epoch": 14.516129032258064, "grad_norm": 0.8720670938491821, "learning_rate": 8.784422110552765e-06, "loss": 0.0062, "step": 12600 }, { "epoch": 14.544930875576037, "grad_norm": 0.5247597098350525, "learning_rate": 8.781909547738695e-06, "loss": 0.0064, "step": 12625 }, { "epoch": 14.573732718894009, "grad_norm": 0.7238091230392456, "learning_rate": 8.779396984924624e-06, "loss": 0.006, "step": 12650 }, { "epoch": 14.602534562211982, "grad_norm": 0.5899724960327148, "learning_rate": 8.776884422110553e-06, "loss": 0.006, "step": 12675 }, { "epoch": 14.631336405529954, "grad_norm": 1.04344642162323, "learning_rate": 8.774371859296483e-06, "loss": 0.0061, "step": 12700 }, { "epoch": 14.660138248847927, "grad_norm": 2.1796579360961914, "learning_rate": 8.771859296482412e-06, "loss": 0.0068, "step": 12725 }, { "epoch": 14.688940092165899, "grad_norm": 4.752668380737305, "learning_rate": 8.769346733668343e-06, "loss": 0.0075, "step": 12750 }, { "epoch": 14.717741935483872, "grad_norm": 0.5938330888748169, "learning_rate": 8.766834170854272e-06, "loss": 0.0065, "step": 12775 }, { "epoch": 14.746543778801843, "grad_norm": 0.7109923362731934, "learning_rate": 8.764321608040202e-06, "loss": 0.0068, "step": 12800 }, { "epoch": 14.775345622119815, "grad_norm": 2.9712648391723633, "learning_rate": 8.761809045226131e-06, "loss": 0.0069, "step": 12825 }, { "epoch": 14.804147465437788, "grad_norm": 0.4363464117050171, "learning_rate": 8.75929648241206e-06, "loss": 0.0067, "step": 12850 }, { "epoch": 14.83294930875576, "grad_norm": 0.5448355674743652, "learning_rate": 8.756783919597991e-06, "loss": 0.0081, "step": 12875 }, { "epoch": 14.861751152073733, "grad_norm": 1.0198894739151, "learning_rate": 8.75427135678392e-06, "loss": 0.0074, "step": 12900 }, { "epoch": 14.890552995391705, "grad_norm": 0.624271810054779, "learning_rate": 8.75175879396985e-06, "loss": 0.0072, "step": 12925 }, { "epoch": 14.919354838709678, "grad_norm": 0.6458427309989929, "learning_rate": 8.74924623115578e-06, "loss": 0.0073, "step": 12950 }, { "epoch": 14.94815668202765, "grad_norm": 3.247443675994873, "learning_rate": 8.746733668341709e-06, "loss": 0.0072, "step": 12975 }, { "epoch": 14.976958525345623, "grad_norm": 0.9023982882499695, "learning_rate": 8.74422110552764e-06, "loss": 0.0075, "step": 13000 }, { "epoch": 14.976958525345623, "eval_loss": 0.8399462103843689, "eval_runtime": 84.3288, "eval_samples_per_second": 37.532, "eval_steps_per_second": 4.696, "eval_wer": 38.284412094826706, "step": 13000 }, { "epoch": 15.005760368663594, "grad_norm": 0.58636474609375, "learning_rate": 8.741708542713569e-06, "loss": 0.0071, "step": 13025 }, { "epoch": 15.034562211981568, "grad_norm": 1.758497953414917, "learning_rate": 8.739195979899498e-06, "loss": 0.0049, "step": 13050 }, { "epoch": 15.06336405529954, "grad_norm": 1.5188539028167725, "learning_rate": 8.736683417085428e-06, "loss": 0.0056, "step": 13075 }, { "epoch": 15.09216589861751, "grad_norm": 0.5889074206352234, "learning_rate": 8.734170854271357e-06, "loss": 0.0068, "step": 13100 }, { "epoch": 15.120967741935484, "grad_norm": 0.7042338252067566, "learning_rate": 8.731658291457286e-06, "loss": 0.0059, "step": 13125 }, { "epoch": 15.149769585253456, "grad_norm": 0.37950950860977173, "learning_rate": 8.729145728643217e-06, "loss": 0.0052, "step": 13150 }, { "epoch": 15.178571428571429, "grad_norm": 0.37451037764549255, "learning_rate": 8.726633165829147e-06, "loss": 0.0054, "step": 13175 }, { "epoch": 15.2073732718894, "grad_norm": 0.5078937411308289, "learning_rate": 8.724120603015076e-06, "loss": 0.0053, "step": 13200 }, { "epoch": 15.236175115207374, "grad_norm": 0.41019222140312195, "learning_rate": 8.721608040201007e-06, "loss": 0.0054, "step": 13225 }, { "epoch": 15.264976958525345, "grad_norm": 0.44372081756591797, "learning_rate": 8.719095477386934e-06, "loss": 0.0057, "step": 13250 }, { "epoch": 15.293778801843319, "grad_norm": 2.010026216506958, "learning_rate": 8.716582914572866e-06, "loss": 0.0048, "step": 13275 }, { "epoch": 15.32258064516129, "grad_norm": 0.5479776859283447, "learning_rate": 8.714070351758795e-06, "loss": 0.0061, "step": 13300 }, { "epoch": 15.351382488479263, "grad_norm": 0.5027567148208618, "learning_rate": 8.711557788944724e-06, "loss": 0.0063, "step": 13325 }, { "epoch": 15.380184331797235, "grad_norm": 1.5200526714324951, "learning_rate": 8.709045226130653e-06, "loss": 0.0058, "step": 13350 }, { "epoch": 15.408986175115208, "grad_norm": 0.3625454902648926, "learning_rate": 8.706532663316584e-06, "loss": 0.005, "step": 13375 }, { "epoch": 15.43778801843318, "grad_norm": 1.2429091930389404, "learning_rate": 8.704020100502514e-06, "loss": 0.0052, "step": 13400 }, { "epoch": 15.466589861751151, "grad_norm": 0.42101046442985535, "learning_rate": 8.701507537688443e-06, "loss": 0.0051, "step": 13425 }, { "epoch": 15.495391705069125, "grad_norm": 1.5347704887390137, "learning_rate": 8.698994974874372e-06, "loss": 0.0064, "step": 13450 }, { "epoch": 15.524193548387096, "grad_norm": 0.7647246718406677, "learning_rate": 8.696482412060302e-06, "loss": 0.0053, "step": 13475 }, { "epoch": 15.55299539170507, "grad_norm": 0.36360058188438416, "learning_rate": 8.693969849246233e-06, "loss": 0.0049, "step": 13500 }, { "epoch": 15.581797235023041, "grad_norm": 0.5429810881614685, "learning_rate": 8.69145728643216e-06, "loss": 0.0055, "step": 13525 }, { "epoch": 15.610599078341014, "grad_norm": 0.4658704698085785, "learning_rate": 8.688944723618091e-06, "loss": 0.0057, "step": 13550 }, { "epoch": 15.639400921658986, "grad_norm": 0.7398175001144409, "learning_rate": 8.68643216080402e-06, "loss": 0.0057, "step": 13575 }, { "epoch": 15.668202764976959, "grad_norm": 1.4540014266967773, "learning_rate": 8.68391959798995e-06, "loss": 0.0061, "step": 13600 }, { "epoch": 15.69700460829493, "grad_norm": 1.667484164237976, "learning_rate": 8.681407035175881e-06, "loss": 0.0057, "step": 13625 }, { "epoch": 15.725806451612904, "grad_norm": 0.46649298071861267, "learning_rate": 8.67889447236181e-06, "loss": 0.0057, "step": 13650 }, { "epoch": 15.754608294930875, "grad_norm": 5.5160136222839355, "learning_rate": 8.676482412060302e-06, "loss": 0.0059, "step": 13675 }, { "epoch": 15.783410138248849, "grad_norm": 0.8757949471473694, "learning_rate": 8.673969849246231e-06, "loss": 0.0055, "step": 13700 }, { "epoch": 15.81221198156682, "grad_norm": 0.755118191242218, "learning_rate": 8.67145728643216e-06, "loss": 0.0061, "step": 13725 }, { "epoch": 15.841013824884792, "grad_norm": 1.0177992582321167, "learning_rate": 8.668944723618092e-06, "loss": 0.0058, "step": 13750 }, { "epoch": 15.869815668202765, "grad_norm": 1.1667072772979736, "learning_rate": 8.666432160804021e-06, "loss": 0.0061, "step": 13775 }, { "epoch": 15.898617511520737, "grad_norm": 1.4741710424423218, "learning_rate": 8.664020100502514e-06, "loss": 0.0063, "step": 13800 }, { "epoch": 15.92741935483871, "grad_norm": 4.335197925567627, "learning_rate": 8.661507537688444e-06, "loss": 0.0067, "step": 13825 }, { "epoch": 15.956221198156681, "grad_norm": 0.7815756797790527, "learning_rate": 8.658994974874373e-06, "loss": 0.0063, "step": 13850 }, { "epoch": 15.985023041474655, "grad_norm": 0.649519145488739, "learning_rate": 8.656482412060302e-06, "loss": 0.006, "step": 13875 }, { "epoch": 16.013824884792626, "grad_norm": 0.45130935311317444, "learning_rate": 8.653969849246231e-06, "loss": 0.0057, "step": 13900 }, { "epoch": 16.042626728110598, "grad_norm": 1.6727375984191895, "learning_rate": 8.65145728643216e-06, "loss": 0.0044, "step": 13925 }, { "epoch": 16.071428571428573, "grad_norm": 0.289116233587265, "learning_rate": 8.648944723618092e-06, "loss": 0.0048, "step": 13950 }, { "epoch": 16.100230414746544, "grad_norm": 0.5323686599731445, "learning_rate": 8.64643216080402e-06, "loss": 0.0044, "step": 13975 }, { "epoch": 16.129032258064516, "grad_norm": 0.35244685411453247, "learning_rate": 8.64391959798995e-06, "loss": 0.0049, "step": 14000 }, { "epoch": 16.129032258064516, "eval_loss": 0.8589875102043152, "eval_runtime": 85.0214, "eval_samples_per_second": 37.226, "eval_steps_per_second": 4.658, "eval_wer": 38.36604197136152, "step": 14000 }, { "epoch": 16.157834101382488, "grad_norm": 0.7141326665878296, "learning_rate": 8.64140703517588e-06, "loss": 0.0041, "step": 14025 }, { "epoch": 16.18663594470046, "grad_norm": 0.8862543702125549, "learning_rate": 8.638894472361809e-06, "loss": 0.0038, "step": 14050 }, { "epoch": 16.215437788018434, "grad_norm": 0.3841165602207184, "learning_rate": 8.63638190954774e-06, "loss": 0.0038, "step": 14075 }, { "epoch": 16.244239631336406, "grad_norm": 3.0119380950927734, "learning_rate": 8.63386934673367e-06, "loss": 0.0038, "step": 14100 }, { "epoch": 16.273041474654377, "grad_norm": 0.8715463280677795, "learning_rate": 8.631356783919599e-06, "loss": 0.0039, "step": 14125 }, { "epoch": 16.30184331797235, "grad_norm": 0.42746609449386597, "learning_rate": 8.628844221105528e-06, "loss": 0.0044, "step": 14150 }, { "epoch": 16.330645161290324, "grad_norm": 1.2124598026275635, "learning_rate": 8.626331658291457e-06, "loss": 0.0045, "step": 14175 }, { "epoch": 16.359447004608295, "grad_norm": 11.402376174926758, "learning_rate": 8.623819095477388e-06, "loss": 0.0048, "step": 14200 }, { "epoch": 16.388248847926267, "grad_norm": 0.5969913005828857, "learning_rate": 8.621306532663318e-06, "loss": 0.0041, "step": 14225 }, { "epoch": 16.41705069124424, "grad_norm": 1.3652573823928833, "learning_rate": 8.618793969849247e-06, "loss": 0.004, "step": 14250 }, { "epoch": 16.445852534562214, "grad_norm": 0.7264500260353088, "learning_rate": 8.616281407035176e-06, "loss": 0.0043, "step": 14275 }, { "epoch": 16.474654377880185, "grad_norm": 0.3619191348552704, "learning_rate": 8.613768844221106e-06, "loss": 0.0041, "step": 14300 }, { "epoch": 16.503456221198157, "grad_norm": 0.3303234875202179, "learning_rate": 8.611256281407035e-06, "loss": 0.0049, "step": 14325 }, { "epoch": 16.532258064516128, "grad_norm": 0.5543037056922913, "learning_rate": 8.608743718592966e-06, "loss": 0.0041, "step": 14350 }, { "epoch": 16.5610599078341, "grad_norm": 0.9183310866355896, "learning_rate": 8.606231155778895e-06, "loss": 0.0044, "step": 14375 }, { "epoch": 16.589861751152075, "grad_norm": 0.5069083571434021, "learning_rate": 8.603718592964825e-06, "loss": 0.005, "step": 14400 }, { "epoch": 16.618663594470046, "grad_norm": 1.362262487411499, "learning_rate": 8.601206030150756e-06, "loss": 0.0048, "step": 14425 }, { "epoch": 16.647465437788018, "grad_norm": 0.5827749371528625, "learning_rate": 8.598693467336683e-06, "loss": 0.0043, "step": 14450 }, { "epoch": 16.67626728110599, "grad_norm": 0.8752914071083069, "learning_rate": 8.596180904522614e-06, "loss": 0.0064, "step": 14475 }, { "epoch": 16.705069124423964, "grad_norm": 0.8069778084754944, "learning_rate": 8.593668341708544e-06, "loss": 0.0046, "step": 14500 }, { "epoch": 16.733870967741936, "grad_norm": 0.5271980166435242, "learning_rate": 8.591155778894473e-06, "loss": 0.0044, "step": 14525 }, { "epoch": 16.762672811059907, "grad_norm": 0.7341033816337585, "learning_rate": 8.588643216080402e-06, "loss": 0.0048, "step": 14550 }, { "epoch": 16.79147465437788, "grad_norm": 0.552815318107605, "learning_rate": 8.586130653266332e-06, "loss": 0.0051, "step": 14575 }, { "epoch": 16.820276497695854, "grad_norm": 1.399330735206604, "learning_rate": 8.583618090452261e-06, "loss": 0.0051, "step": 14600 }, { "epoch": 16.849078341013826, "grad_norm": 0.6628991365432739, "learning_rate": 8.581105527638192e-06, "loss": 0.0055, "step": 14625 }, { "epoch": 16.877880184331797, "grad_norm": 4.09703254699707, "learning_rate": 8.578592964824121e-06, "loss": 0.0064, "step": 14650 }, { "epoch": 16.90668202764977, "grad_norm": 1.416549801826477, "learning_rate": 8.57608040201005e-06, "loss": 0.0057, "step": 14675 }, { "epoch": 16.93548387096774, "grad_norm": 0.7540784478187561, "learning_rate": 8.573567839195982e-06, "loss": 0.0065, "step": 14700 }, { "epoch": 16.964285714285715, "grad_norm": 0.5062981843948364, "learning_rate": 8.57105527638191e-06, "loss": 0.0054, "step": 14725 }, { "epoch": 16.993087557603687, "grad_norm": 1.6211462020874023, "learning_rate": 8.56854271356784e-06, "loss": 0.0054, "step": 14750 }, { "epoch": 17.02188940092166, "grad_norm": 0.3264090120792389, "learning_rate": 8.56603015075377e-06, "loss": 0.0042, "step": 14775 }, { "epoch": 17.05069124423963, "grad_norm": 1.5885615348815918, "learning_rate": 8.563517587939699e-06, "loss": 0.0036, "step": 14800 }, { "epoch": 17.079493087557605, "grad_norm": 1.1251944303512573, "learning_rate": 8.56100502512563e-06, "loss": 0.0035, "step": 14825 }, { "epoch": 17.108294930875577, "grad_norm": 0.40826329588890076, "learning_rate": 8.558492462311558e-06, "loss": 0.0035, "step": 14850 }, { "epoch": 17.137096774193548, "grad_norm": 0.4759327471256256, "learning_rate": 8.555979899497489e-06, "loss": 0.0032, "step": 14875 }, { "epoch": 17.16589861751152, "grad_norm": 0.472869336605072, "learning_rate": 8.553467336683418e-06, "loss": 0.0038, "step": 14900 }, { "epoch": 17.194700460829495, "grad_norm": 0.27257394790649414, "learning_rate": 8.550954773869347e-06, "loss": 0.0035, "step": 14925 }, { "epoch": 17.223502304147466, "grad_norm": 0.3847483992576599, "learning_rate": 8.548442211055277e-06, "loss": 0.0035, "step": 14950 }, { "epoch": 17.252304147465438, "grad_norm": 0.4849775731563568, "learning_rate": 8.545929648241208e-06, "loss": 0.0035, "step": 14975 }, { "epoch": 17.28110599078341, "grad_norm": 0.5655560493469238, "learning_rate": 8.543417085427135e-06, "loss": 0.0052, "step": 15000 }, { "epoch": 17.28110599078341, "eval_loss": 0.8801432251930237, "eval_runtime": 85.5202, "eval_samples_per_second": 37.009, "eval_steps_per_second": 4.63, "eval_wer": 38.50209176558621, "step": 15000 }, { "epoch": 17.30990783410138, "grad_norm": 0.39433902502059937, "learning_rate": 8.540904522613066e-06, "loss": 0.0043, "step": 15025 }, { "epoch": 17.338709677419356, "grad_norm": 0.7080853581428528, "learning_rate": 8.538391959798996e-06, "loss": 0.0035, "step": 15050 }, { "epoch": 17.367511520737327, "grad_norm": 0.5240097641944885, "learning_rate": 8.535879396984925e-06, "loss": 0.0041, "step": 15075 }, { "epoch": 17.3963133640553, "grad_norm": 2.0834262371063232, "learning_rate": 8.533366834170856e-06, "loss": 0.0052, "step": 15100 }, { "epoch": 17.42511520737327, "grad_norm": 1.0556303262710571, "learning_rate": 8.530854271356784e-06, "loss": 0.0043, "step": 15125 }, { "epoch": 17.453917050691246, "grad_norm": 2.5143723487854004, "learning_rate": 8.528341708542715e-06, "loss": 0.005, "step": 15150 }, { "epoch": 17.482718894009217, "grad_norm": 0.5352573394775391, "learning_rate": 8.525829145728644e-06, "loss": 0.0045, "step": 15175 }, { "epoch": 17.51152073732719, "grad_norm": 0.43107712268829346, "learning_rate": 8.523316582914573e-06, "loss": 0.0044, "step": 15200 }, { "epoch": 17.54032258064516, "grad_norm": 2.800713062286377, "learning_rate": 8.520804020100503e-06, "loss": 0.0043, "step": 15225 }, { "epoch": 17.569124423963135, "grad_norm": 1.9561121463775635, "learning_rate": 8.518291457286434e-06, "loss": 0.0052, "step": 15250 }, { "epoch": 17.597926267281107, "grad_norm": 0.6610175371170044, "learning_rate": 8.515778894472363e-06, "loss": 0.0043, "step": 15275 }, { "epoch": 17.62672811059908, "grad_norm": 0.6430454850196838, "learning_rate": 8.513266331658292e-06, "loss": 0.004, "step": 15300 }, { "epoch": 17.65552995391705, "grad_norm": 0.4515407979488373, "learning_rate": 8.510753768844222e-06, "loss": 0.0041, "step": 15325 }, { "epoch": 17.68433179723502, "grad_norm": 0.983609139919281, "learning_rate": 8.508241206030151e-06, "loss": 0.0048, "step": 15350 }, { "epoch": 17.713133640552996, "grad_norm": 0.5005194544792175, "learning_rate": 8.505728643216082e-06, "loss": 0.0043, "step": 15375 }, { "epoch": 17.741935483870968, "grad_norm": 0.6655073165893555, "learning_rate": 8.50321608040201e-06, "loss": 0.0048, "step": 15400 }, { "epoch": 17.77073732718894, "grad_norm": 3.0882365703582764, "learning_rate": 8.50070351758794e-06, "loss": 0.0044, "step": 15425 }, { "epoch": 17.79953917050691, "grad_norm": 0.9748306274414062, "learning_rate": 8.49819095477387e-06, "loss": 0.0058, "step": 15450 }, { "epoch": 17.828341013824886, "grad_norm": 3.1010043621063232, "learning_rate": 8.4956783919598e-06, "loss": 0.005, "step": 15475 }, { "epoch": 17.857142857142858, "grad_norm": 2.438321590423584, "learning_rate": 8.49316582914573e-06, "loss": 0.0048, "step": 15500 }, { "epoch": 17.88594470046083, "grad_norm": 0.656531572341919, "learning_rate": 8.49065326633166e-06, "loss": 0.0049, "step": 15525 }, { "epoch": 17.9147465437788, "grad_norm": 0.7923159599304199, "learning_rate": 8.488140703517589e-06, "loss": 0.005, "step": 15550 }, { "epoch": 17.943548387096776, "grad_norm": 1.1070263385772705, "learning_rate": 8.485628140703518e-06, "loss": 0.0058, "step": 15575 }, { "epoch": 17.972350230414747, "grad_norm": 0.7388762831687927, "learning_rate": 8.483115577889447e-06, "loss": 0.0045, "step": 15600 }, { "epoch": 18.00115207373272, "grad_norm": 0.8426020741462708, "learning_rate": 8.480603015075377e-06, "loss": 0.0045, "step": 15625 }, { "epoch": 18.02995391705069, "grad_norm": 0.4192512035369873, "learning_rate": 8.478090452261308e-06, "loss": 0.0038, "step": 15650 }, { "epoch": 18.058755760368662, "grad_norm": 2.2377736568450928, "learning_rate": 8.475577889447237e-06, "loss": 0.0037, "step": 15675 }, { "epoch": 18.087557603686637, "grad_norm": 1.6286989450454712, "learning_rate": 8.473065326633166e-06, "loss": 0.0037, "step": 15700 }, { "epoch": 18.11635944700461, "grad_norm": 0.34321683645248413, "learning_rate": 8.470552763819096e-06, "loss": 0.004, "step": 15725 }, { "epoch": 18.14516129032258, "grad_norm": 0.5919313430786133, "learning_rate": 8.468040201005025e-06, "loss": 0.0036, "step": 15750 }, { "epoch": 18.17396313364055, "grad_norm": 2.309155225753784, "learning_rate": 8.465527638190956e-06, "loss": 0.0038, "step": 15775 }, { "epoch": 18.202764976958527, "grad_norm": 2.1098005771636963, "learning_rate": 8.463015075376885e-06, "loss": 0.0039, "step": 15800 }, { "epoch": 18.231566820276498, "grad_norm": 1.0375442504882812, "learning_rate": 8.460502512562815e-06, "loss": 0.0038, "step": 15825 }, { "epoch": 18.26036866359447, "grad_norm": 2.8576114177703857, "learning_rate": 8.457989949748744e-06, "loss": 0.0043, "step": 15850 }, { "epoch": 18.28917050691244, "grad_norm": 0.5303353667259216, "learning_rate": 8.455477386934673e-06, "loss": 0.0035, "step": 15875 }, { "epoch": 18.317972350230416, "grad_norm": 0.6812912225723267, "learning_rate": 8.452964824120604e-06, "loss": 0.0034, "step": 15900 }, { "epoch": 18.346774193548388, "grad_norm": 0.4166198968887329, "learning_rate": 8.450452261306534e-06, "loss": 0.0033, "step": 15925 }, { "epoch": 18.37557603686636, "grad_norm": 1.5603909492492676, "learning_rate": 8.447939698492463e-06, "loss": 0.0045, "step": 15950 }, { "epoch": 18.40437788018433, "grad_norm": 0.45958149433135986, "learning_rate": 8.445427135678392e-06, "loss": 0.0033, "step": 15975 }, { "epoch": 18.433179723502302, "grad_norm": 0.5990963578224182, "learning_rate": 8.442914572864322e-06, "loss": 0.0039, "step": 16000 }, { "epoch": 18.433179723502302, "eval_loss": 0.8964568376541138, "eval_runtime": 84.5351, "eval_samples_per_second": 37.44, "eval_steps_per_second": 4.684, "eval_wer": 38.49869052073059, "step": 16000 }, { "epoch": 18.461981566820278, "grad_norm": 0.5313112735748291, "learning_rate": 8.440402010050251e-06, "loss": 0.0034, "step": 16025 }, { "epoch": 18.49078341013825, "grad_norm": 0.36253178119659424, "learning_rate": 8.437889447236182e-06, "loss": 0.0039, "step": 16050 }, { "epoch": 18.51958525345622, "grad_norm": 0.92385333776474, "learning_rate": 8.435376884422111e-06, "loss": 0.0035, "step": 16075 }, { "epoch": 18.548387096774192, "grad_norm": 0.7161293625831604, "learning_rate": 8.43286432160804e-06, "loss": 0.0036, "step": 16100 }, { "epoch": 18.577188940092167, "grad_norm": 0.2975520193576813, "learning_rate": 8.430351758793972e-06, "loss": 0.0043, "step": 16125 }, { "epoch": 18.60599078341014, "grad_norm": 0.9230226278305054, "learning_rate": 8.4278391959799e-06, "loss": 0.0035, "step": 16150 }, { "epoch": 18.63479262672811, "grad_norm": 0.31335213780403137, "learning_rate": 8.42532663316583e-06, "loss": 0.0039, "step": 16175 }, { "epoch": 18.663594470046082, "grad_norm": 0.4379565119743347, "learning_rate": 8.42281407035176e-06, "loss": 0.0038, "step": 16200 }, { "epoch": 18.692396313364057, "grad_norm": 0.6504258513450623, "learning_rate": 8.420301507537689e-06, "loss": 0.0048, "step": 16225 }, { "epoch": 18.72119815668203, "grad_norm": 2.0989575386047363, "learning_rate": 8.417788944723618e-06, "loss": 0.005, "step": 16250 }, { "epoch": 18.75, "grad_norm": 4.0336079597473145, "learning_rate": 8.415276381909548e-06, "loss": 0.0042, "step": 16275 }, { "epoch": 18.77880184331797, "grad_norm": 0.4784071445465088, "learning_rate": 8.412763819095479e-06, "loss": 0.0041, "step": 16300 }, { "epoch": 18.807603686635943, "grad_norm": 3.3699164390563965, "learning_rate": 8.410251256281408e-06, "loss": 0.0049, "step": 16325 }, { "epoch": 18.836405529953918, "grad_norm": 1.314875602722168, "learning_rate": 8.407738693467337e-06, "loss": 0.0048, "step": 16350 }, { "epoch": 18.86520737327189, "grad_norm": 0.8776557445526123, "learning_rate": 8.405226130653267e-06, "loss": 0.004, "step": 16375 }, { "epoch": 18.89400921658986, "grad_norm": 1.4758939743041992, "learning_rate": 8.402713567839198e-06, "loss": 0.0043, "step": 16400 }, { "epoch": 18.922811059907833, "grad_norm": 0.4875180721282959, "learning_rate": 8.400201005025125e-06, "loss": 0.0048, "step": 16425 }, { "epoch": 18.951612903225808, "grad_norm": 0.4063858389854431, "learning_rate": 8.397688442211056e-06, "loss": 0.0039, "step": 16450 }, { "epoch": 18.98041474654378, "grad_norm": 0.7259709239006042, "learning_rate": 8.395175879396986e-06, "loss": 0.0044, "step": 16475 }, { "epoch": 19.00921658986175, "grad_norm": 3.675870418548584, "learning_rate": 8.392663316582915e-06, "loss": 0.0048, "step": 16500 }, { "epoch": 19.038018433179722, "grad_norm": 0.4127393364906311, "learning_rate": 8.390150753768846e-06, "loss": 0.0032, "step": 16525 }, { "epoch": 19.066820276497698, "grad_norm": 3.1078572273254395, "learning_rate": 8.387638190954774e-06, "loss": 0.004, "step": 16550 }, { "epoch": 19.09562211981567, "grad_norm": 0.8320157527923584, "learning_rate": 8.385125628140705e-06, "loss": 0.0037, "step": 16575 }, { "epoch": 19.12442396313364, "grad_norm": 4.440084934234619, "learning_rate": 8.382613065326634e-06, "loss": 0.0033, "step": 16600 }, { "epoch": 19.153225806451612, "grad_norm": 1.0799018144607544, "learning_rate": 8.380100502512563e-06, "loss": 0.0035, "step": 16625 }, { "epoch": 19.182027649769584, "grad_norm": 1.2514456510543823, "learning_rate": 8.377587939698493e-06, "loss": 0.003, "step": 16650 }, { "epoch": 19.21082949308756, "grad_norm": 0.4411851167678833, "learning_rate": 8.375075376884424e-06, "loss": 0.0032, "step": 16675 }, { "epoch": 19.23963133640553, "grad_norm": 0.5363137722015381, "learning_rate": 8.372562814070353e-06, "loss": 0.0031, "step": 16700 }, { "epoch": 19.268433179723502, "grad_norm": 0.29095005989074707, "learning_rate": 8.370050251256282e-06, "loss": 0.0043, "step": 16725 }, { "epoch": 19.297235023041473, "grad_norm": 0.8113002181053162, "learning_rate": 8.367537688442212e-06, "loss": 0.0044, "step": 16750 }, { "epoch": 19.32603686635945, "grad_norm": 0.37377282977104187, "learning_rate": 8.365025125628141e-06, "loss": 0.0037, "step": 16775 }, { "epoch": 19.35483870967742, "grad_norm": 0.4084361791610718, "learning_rate": 8.362512562814072e-06, "loss": 0.0037, "step": 16800 }, { "epoch": 19.38364055299539, "grad_norm": 1.0157698392868042, "learning_rate": 8.36e-06, "loss": 0.0029, "step": 16825 }, { "epoch": 19.412442396313363, "grad_norm": 2.223855972290039, "learning_rate": 8.35748743718593e-06, "loss": 0.0034, "step": 16850 }, { "epoch": 19.441244239631338, "grad_norm": 0.4188908040523529, "learning_rate": 8.35497487437186e-06, "loss": 0.0033, "step": 16875 }, { "epoch": 19.47004608294931, "grad_norm": 0.34078195691108704, "learning_rate": 8.35246231155779e-06, "loss": 0.0031, "step": 16900 }, { "epoch": 19.49884792626728, "grad_norm": 1.0634256601333618, "learning_rate": 8.34994974874372e-06, "loss": 0.0031, "step": 16925 }, { "epoch": 19.527649769585253, "grad_norm": 0.5438229441642761, "learning_rate": 8.34743718592965e-06, "loss": 0.0029, "step": 16950 }, { "epoch": 19.556451612903224, "grad_norm": 0.3398396670818329, "learning_rate": 8.344924623115579e-06, "loss": 0.0036, "step": 16975 }, { "epoch": 19.5852534562212, "grad_norm": 0.46514657139778137, "learning_rate": 8.342412060301508e-06, "loss": 0.003, "step": 17000 }, { "epoch": 19.5852534562212, "eval_loss": 0.9077998399734497, "eval_runtime": 85.0625, "eval_samples_per_second": 37.208, "eval_steps_per_second": 4.655, "eval_wer": 38.45107309275195, "step": 17000 }, { "epoch": 19.61405529953917, "grad_norm": 0.25680673122406006, "learning_rate": 8.339899497487438e-06, "loss": 0.0032, "step": 17025 }, { "epoch": 19.642857142857142, "grad_norm": 1.5100712776184082, "learning_rate": 8.337386934673367e-06, "loss": 0.0036, "step": 17050 }, { "epoch": 19.671658986175114, "grad_norm": 0.41589298844337463, "learning_rate": 8.334874371859298e-06, "loss": 0.0037, "step": 17075 }, { "epoch": 19.70046082949309, "grad_norm": 2.06826114654541, "learning_rate": 8.332361809045226e-06, "loss": 0.0028, "step": 17100 }, { "epoch": 19.72926267281106, "grad_norm": 0.6016713380813599, "learning_rate": 8.329849246231157e-06, "loss": 0.003, "step": 17125 }, { "epoch": 19.758064516129032, "grad_norm": 0.563005805015564, "learning_rate": 8.327336683417086e-06, "loss": 0.0032, "step": 17150 }, { "epoch": 19.786866359447004, "grad_norm": 0.34128084778785706, "learning_rate": 8.324824120603015e-06, "loss": 0.0039, "step": 17175 }, { "epoch": 19.81566820276498, "grad_norm": 1.1265920400619507, "learning_rate": 8.322311557788946e-06, "loss": 0.0033, "step": 17200 }, { "epoch": 19.84447004608295, "grad_norm": 8.35198974609375, "learning_rate": 8.319798994974876e-06, "loss": 0.0033, "step": 17225 }, { "epoch": 19.87327188940092, "grad_norm": 2.044416666030884, "learning_rate": 8.317286432160805e-06, "loss": 0.0035, "step": 17250 }, { "epoch": 19.902073732718893, "grad_norm": 0.6673789024353027, "learning_rate": 8.314773869346734e-06, "loss": 0.0036, "step": 17275 }, { "epoch": 19.930875576036865, "grad_norm": 6.148689270019531, "learning_rate": 8.312261306532663e-06, "loss": 0.0045, "step": 17300 }, { "epoch": 19.95967741935484, "grad_norm": 2.0340828895568848, "learning_rate": 8.309748743718595e-06, "loss": 0.0036, "step": 17325 }, { "epoch": 19.98847926267281, "grad_norm": 0.6998608112335205, "learning_rate": 8.307236180904524e-06, "loss": 0.0043, "step": 17350 }, { "epoch": 20.017281105990783, "grad_norm": 0.6519997119903564, "learning_rate": 8.304723618090453e-06, "loss": 0.0041, "step": 17375 }, { "epoch": 20.046082949308754, "grad_norm": 7.046141147613525, "learning_rate": 8.302211055276382e-06, "loss": 0.0038, "step": 17400 }, { "epoch": 20.07488479262673, "grad_norm": 0.6556172966957092, "learning_rate": 8.299698492462312e-06, "loss": 0.0031, "step": 17425 }, { "epoch": 20.1036866359447, "grad_norm": 0.4308678209781647, "learning_rate": 8.297185929648241e-06, "loss": 0.0039, "step": 17450 }, { "epoch": 20.132488479262673, "grad_norm": 3.6136600971221924, "learning_rate": 8.294673366834172e-06, "loss": 0.0029, "step": 17475 }, { "epoch": 20.161290322580644, "grad_norm": 2.0070505142211914, "learning_rate": 8.292160804020101e-06, "loss": 0.0027, "step": 17500 }, { "epoch": 20.19009216589862, "grad_norm": 1.687195062637329, "learning_rate": 8.28964824120603e-06, "loss": 0.0034, "step": 17525 }, { "epoch": 20.21889400921659, "grad_norm": 0.32570701837539673, "learning_rate": 8.287135678391962e-06, "loss": 0.0029, "step": 17550 }, { "epoch": 20.247695852534562, "grad_norm": 0.2792959213256836, "learning_rate": 8.28462311557789e-06, "loss": 0.0036, "step": 17575 }, { "epoch": 20.276497695852534, "grad_norm": 0.30617085099220276, "learning_rate": 8.28211055276382e-06, "loss": 0.0029, "step": 17600 }, { "epoch": 20.305299539170505, "grad_norm": 0.4590087831020355, "learning_rate": 8.27959798994975e-06, "loss": 0.0024, "step": 17625 }, { "epoch": 20.33410138248848, "grad_norm": 0.4867629110813141, "learning_rate": 8.277085427135679e-06, "loss": 0.0023, "step": 17650 }, { "epoch": 20.362903225806452, "grad_norm": 0.5633640885353088, "learning_rate": 8.274572864321608e-06, "loss": 0.0028, "step": 17675 }, { "epoch": 20.391705069124423, "grad_norm": 2.707547187805176, "learning_rate": 8.272060301507538e-06, "loss": 0.0035, "step": 17700 }, { "epoch": 20.420506912442395, "grad_norm": 3.288668155670166, "learning_rate": 8.269547738693467e-06, "loss": 0.0028, "step": 17725 }, { "epoch": 20.44930875576037, "grad_norm": 2.6495327949523926, "learning_rate": 8.267035175879398e-06, "loss": 0.0035, "step": 17750 }, { "epoch": 20.47811059907834, "grad_norm": 3.7775795459747314, "learning_rate": 8.264522613065327e-06, "loss": 0.0038, "step": 17775 }, { "epoch": 20.506912442396313, "grad_norm": 1.4332729578018188, "learning_rate": 8.262010050251257e-06, "loss": 0.0039, "step": 17800 }, { "epoch": 20.535714285714285, "grad_norm": 0.5143281817436218, "learning_rate": 8.259497487437188e-06, "loss": 0.003, "step": 17825 }, { "epoch": 20.56451612903226, "grad_norm": 0.39990219473838806, "learning_rate": 8.256984924623115e-06, "loss": 0.0029, "step": 17850 }, { "epoch": 20.59331797235023, "grad_norm": 1.9264297485351562, "learning_rate": 8.254472361809046e-06, "loss": 0.003, "step": 17875 }, { "epoch": 20.622119815668203, "grad_norm": 1.0413120985031128, "learning_rate": 8.251959798994976e-06, "loss": 0.0034, "step": 17900 }, { "epoch": 20.650921658986174, "grad_norm": 0.3129425048828125, "learning_rate": 8.249547738693467e-06, "loss": 0.0034, "step": 17925 }, { "epoch": 20.679723502304146, "grad_norm": 0.7232864499092102, "learning_rate": 8.247035175879398e-06, "loss": 0.0034, "step": 17950 }, { "epoch": 20.70852534562212, "grad_norm": 0.7560633420944214, "learning_rate": 8.244522613065328e-06, "loss": 0.003, "step": 17975 }, { "epoch": 20.737327188940093, "grad_norm": 1.3522878885269165, "learning_rate": 8.242010050251257e-06, "loss": 0.0043, "step": 18000 }, { "epoch": 20.737327188940093, "eval_loss": 0.9137334227561951, "eval_runtime": 110.9579, "eval_samples_per_second": 28.524, "eval_steps_per_second": 3.569, "eval_wer": 38.508894255297434, "step": 18000 }, { "epoch": 20.766129032258064, "grad_norm": 0.4229632616043091, "learning_rate": 8.239497487437186e-06, "loss": 0.0034, "step": 18025 }, { "epoch": 20.794930875576036, "grad_norm": 0.26873457431793213, "learning_rate": 8.236984924623116e-06, "loss": 0.0034, "step": 18050 }, { "epoch": 20.82373271889401, "grad_norm": 0.2838130593299866, "learning_rate": 8.234572864321609e-06, "loss": 0.0034, "step": 18075 }, { "epoch": 20.852534562211982, "grad_norm": 4.030309200286865, "learning_rate": 8.232060301507538e-06, "loss": 0.0032, "step": 18100 }, { "epoch": 20.881336405529954, "grad_norm": 0.7039595246315002, "learning_rate": 8.229547738693467e-06, "loss": 0.0034, "step": 18125 }, { "epoch": 20.910138248847925, "grad_norm": 10.146681785583496, "learning_rate": 8.227035175879397e-06, "loss": 0.0038, "step": 18150 }, { "epoch": 20.9389400921659, "grad_norm": 2.8911712169647217, "learning_rate": 8.224522613065328e-06, "loss": 0.0034, "step": 18175 }, { "epoch": 20.967741935483872, "grad_norm": 0.3136543929576874, "learning_rate": 8.222010050251257e-06, "loss": 0.0036, "step": 18200 }, { "epoch": 20.996543778801843, "grad_norm": 0.39129388332366943, "learning_rate": 8.219497487437186e-06, "loss": 0.003, "step": 18225 }, { "epoch": 21.025345622119815, "grad_norm": 0.6136582493782043, "learning_rate": 8.216984924623116e-06, "loss": 0.0026, "step": 18250 }, { "epoch": 21.054147465437786, "grad_norm": 0.18073834478855133, "learning_rate": 8.214472361809047e-06, "loss": 0.0021, "step": 18275 }, { "epoch": 21.08294930875576, "grad_norm": 0.2269873321056366, "learning_rate": 8.211959798994974e-06, "loss": 0.0028, "step": 18300 }, { "epoch": 21.111751152073733, "grad_norm": 4.462664604187012, "learning_rate": 8.209447236180905e-06, "loss": 0.0025, "step": 18325 }, { "epoch": 21.140552995391705, "grad_norm": 0.13837726414203644, "learning_rate": 8.206934673366835e-06, "loss": 0.0023, "step": 18350 }, { "epoch": 21.169354838709676, "grad_norm": 0.24502778053283691, "learning_rate": 8.204422110552764e-06, "loss": 0.0024, "step": 18375 }, { "epoch": 21.19815668202765, "grad_norm": 1.8128526210784912, "learning_rate": 8.201909547738695e-06, "loss": 0.0028, "step": 18400 }, { "epoch": 21.226958525345623, "grad_norm": 9.304438591003418, "learning_rate": 8.199396984924623e-06, "loss": 0.0033, "step": 18425 }, { "epoch": 21.255760368663594, "grad_norm": 1.2997369766235352, "learning_rate": 8.196884422110554e-06, "loss": 0.0023, "step": 18450 }, { "epoch": 21.284562211981566, "grad_norm": 0.8964159488677979, "learning_rate": 8.194371859296483e-06, "loss": 0.0034, "step": 18475 }, { "epoch": 21.313364055299537, "grad_norm": 0.6467965841293335, "learning_rate": 8.191859296482412e-06, "loss": 0.0032, "step": 18500 }, { "epoch": 21.342165898617512, "grad_norm": 0.3044304847717285, "learning_rate": 8.189346733668342e-06, "loss": 0.003, "step": 18525 }, { "epoch": 21.370967741935484, "grad_norm": 0.21780455112457275, "learning_rate": 8.186834170854273e-06, "loss": 0.0028, "step": 18550 }, { "epoch": 21.399769585253456, "grad_norm": 0.26606276631355286, "learning_rate": 8.184321608040202e-06, "loss": 0.0025, "step": 18575 }, { "epoch": 21.428571428571427, "grad_norm": 0.7577885389328003, "learning_rate": 8.181809045226131e-06, "loss": 0.0027, "step": 18600 }, { "epoch": 21.457373271889402, "grad_norm": 1.1948332786560059, "learning_rate": 8.17929648241206e-06, "loss": 0.0037, "step": 18625 }, { "epoch": 21.486175115207374, "grad_norm": 0.5646118521690369, "learning_rate": 8.17678391959799e-06, "loss": 0.0032, "step": 18650 }, { "epoch": 21.514976958525345, "grad_norm": 0.31275391578674316, "learning_rate": 8.174271356783921e-06, "loss": 0.0028, "step": 18675 }, { "epoch": 21.543778801843317, "grad_norm": 0.5724737644195557, "learning_rate": 8.171758793969849e-06, "loss": 0.0027, "step": 18700 }, { "epoch": 21.572580645161292, "grad_norm": 0.8115091323852539, "learning_rate": 8.16924623115578e-06, "loss": 0.0023, "step": 18725 }, { "epoch": 21.601382488479263, "grad_norm": 1.2583937644958496, "learning_rate": 8.166733668341709e-06, "loss": 0.0027, "step": 18750 }, { "epoch": 21.630184331797235, "grad_norm": 1.3207777738571167, "learning_rate": 8.164221105527638e-06, "loss": 0.0027, "step": 18775 }, { "epoch": 21.658986175115206, "grad_norm": 0.29879096150398254, "learning_rate": 8.16170854271357e-06, "loss": 0.0025, "step": 18800 }, { "epoch": 21.68778801843318, "grad_norm": 8.853851318359375, "learning_rate": 8.159195979899499e-06, "loss": 0.0035, "step": 18825 }, { "epoch": 21.716589861751153, "grad_norm": 0.4238080382347107, "learning_rate": 8.156683417085428e-06, "loss": 0.0029, "step": 18850 }, { "epoch": 21.745391705069125, "grad_norm": 0.48964083194732666, "learning_rate": 8.154170854271357e-06, "loss": 0.0026, "step": 18875 }, { "epoch": 21.774193548387096, "grad_norm": 1.622617483139038, "learning_rate": 8.151658291457287e-06, "loss": 0.003, "step": 18900 }, { "epoch": 21.802995391705068, "grad_norm": 0.3238430619239807, "learning_rate": 8.149145728643216e-06, "loss": 0.003, "step": 18925 }, { "epoch": 21.831797235023043, "grad_norm": 1.5126019716262817, "learning_rate": 8.146633165829147e-06, "loss": 0.0031, "step": 18950 }, { "epoch": 21.860599078341014, "grad_norm": 0.40093284845352173, "learning_rate": 8.144120603015076e-06, "loss": 0.0025, "step": 18975 }, { "epoch": 21.889400921658986, "grad_norm": 2.164680004119873, "learning_rate": 8.141608040201006e-06, "loss": 0.0031, "step": 19000 }, { "epoch": 21.889400921658986, "eval_loss": 0.9279758930206299, "eval_runtime": 91.2807, "eval_samples_per_second": 34.673, "eval_steps_per_second": 4.338, "eval_wer": 38.65854902894459, "step": 19000 }, { "epoch": 21.918202764976957, "grad_norm": 0.5282910466194153, "learning_rate": 8.139095477386935e-06, "loss": 0.0032, "step": 19025 }, { "epoch": 21.947004608294932, "grad_norm": 0.6237956285476685, "learning_rate": 8.136582914572864e-06, "loss": 0.0032, "step": 19050 }, { "epoch": 21.975806451612904, "grad_norm": 4.942371368408203, "learning_rate": 8.134070351758795e-06, "loss": 0.0031, "step": 19075 }, { "epoch": 22.004608294930875, "grad_norm": 0.20130577683448792, "learning_rate": 8.131557788944725e-06, "loss": 0.0034, "step": 19100 }, { "epoch": 22.033410138248847, "grad_norm": 5.130532264709473, "learning_rate": 8.129045226130654e-06, "loss": 0.003, "step": 19125 }, { "epoch": 22.06221198156682, "grad_norm": 0.5252763032913208, "learning_rate": 8.126532663316583e-06, "loss": 0.0033, "step": 19150 }, { "epoch": 22.091013824884794, "grad_norm": 0.4144936800003052, "learning_rate": 8.124020100502513e-06, "loss": 0.0032, "step": 19175 }, { "epoch": 22.119815668202765, "grad_norm": 2.8102610111236572, "learning_rate": 8.121507537688444e-06, "loss": 0.0027, "step": 19200 }, { "epoch": 22.148617511520737, "grad_norm": 0.9101895093917847, "learning_rate": 8.118994974874373e-06, "loss": 0.0022, "step": 19225 }, { "epoch": 22.177419354838708, "grad_norm": 0.44814616441726685, "learning_rate": 8.116482412060302e-06, "loss": 0.0021, "step": 19250 }, { "epoch": 22.206221198156683, "grad_norm": 0.5755036473274231, "learning_rate": 8.113969849246232e-06, "loss": 0.0022, "step": 19275 }, { "epoch": 22.235023041474655, "grad_norm": 0.22766675055027008, "learning_rate": 8.111457286432161e-06, "loss": 0.0025, "step": 19300 }, { "epoch": 22.263824884792626, "grad_norm": 3.4409162998199463, "learning_rate": 8.10894472361809e-06, "loss": 0.0023, "step": 19325 }, { "epoch": 22.292626728110598, "grad_norm": 0.18357917666435242, "learning_rate": 8.106432160804021e-06, "loss": 0.002, "step": 19350 }, { "epoch": 22.321428571428573, "grad_norm": 0.22470392286777496, "learning_rate": 8.10391959798995e-06, "loss": 0.0022, "step": 19375 }, { "epoch": 22.350230414746544, "grad_norm": 1.733270287513733, "learning_rate": 8.10140703517588e-06, "loss": 0.0025, "step": 19400 }, { "epoch": 22.379032258064516, "grad_norm": 1.949141025543213, "learning_rate": 8.098894472361811e-06, "loss": 0.0022, "step": 19425 }, { "epoch": 22.407834101382488, "grad_norm": 0.312378466129303, "learning_rate": 8.096381909547739e-06, "loss": 0.0028, "step": 19450 }, { "epoch": 22.436635944700463, "grad_norm": 0.351078063249588, "learning_rate": 8.09386934673367e-06, "loss": 0.0027, "step": 19475 }, { "epoch": 22.465437788018434, "grad_norm": 0.6218938827514648, "learning_rate": 8.091356783919599e-06, "loss": 0.0029, "step": 19500 }, { "epoch": 22.494239631336406, "grad_norm": 0.29574936628341675, "learning_rate": 8.088844221105528e-06, "loss": 0.0023, "step": 19525 }, { "epoch": 22.523041474654377, "grad_norm": 2.1559417247772217, "learning_rate": 8.086331658291458e-06, "loss": 0.0024, "step": 19550 }, { "epoch": 22.55184331797235, "grad_norm": 1.988834261894226, "learning_rate": 8.083819095477387e-06, "loss": 0.0031, "step": 19575 }, { "epoch": 22.580645161290324, "grad_norm": 0.2758013606071472, "learning_rate": 8.081306532663318e-06, "loss": 0.003, "step": 19600 }, { "epoch": 22.609447004608295, "grad_norm": 0.4193066954612732, "learning_rate": 8.078793969849247e-06, "loss": 0.0025, "step": 19625 }, { "epoch": 22.638248847926267, "grad_norm": 3.132497787475586, "learning_rate": 8.076281407035177e-06, "loss": 0.0035, "step": 19650 }, { "epoch": 22.66705069124424, "grad_norm": 0.6784379482269287, "learning_rate": 8.073768844221106e-06, "loss": 0.0042, "step": 19675 }, { "epoch": 22.695852534562214, "grad_norm": 0.23190373182296753, "learning_rate": 8.071256281407037e-06, "loss": 0.0026, "step": 19700 }, { "epoch": 22.724654377880185, "grad_norm": 1.096924901008606, "learning_rate": 8.068743718592964e-06, "loss": 0.0023, "step": 19725 }, { "epoch": 22.753456221198157, "grad_norm": 9.167013168334961, "learning_rate": 8.066231155778895e-06, "loss": 0.0024, "step": 19750 }, { "epoch": 22.782258064516128, "grad_norm": 0.43118682503700256, "learning_rate": 8.063718592964825e-06, "loss": 0.0024, "step": 19775 }, { "epoch": 22.8110599078341, "grad_norm": 0.665557324886322, "learning_rate": 8.061206030150754e-06, "loss": 0.0026, "step": 19800 }, { "epoch": 22.839861751152075, "grad_norm": 0.7342368364334106, "learning_rate": 8.058693467336685e-06, "loss": 0.0028, "step": 19825 }, { "epoch": 22.868663594470046, "grad_norm": 3.46016001701355, "learning_rate": 8.056180904522613e-06, "loss": 0.0027, "step": 19850 }, { "epoch": 22.897465437788018, "grad_norm": 1.667484164237976, "learning_rate": 8.053668341708544e-06, "loss": 0.0037, "step": 19875 }, { "epoch": 22.92626728110599, "grad_norm": 4.423374176025391, "learning_rate": 8.051155778894473e-06, "loss": 0.0036, "step": 19900 }, { "epoch": 22.955069124423964, "grad_norm": 0.29686522483825684, "learning_rate": 8.048643216080402e-06, "loss": 0.0023, "step": 19925 }, { "epoch": 22.983870967741936, "grad_norm": 0.996593713760376, "learning_rate": 8.046130653266332e-06, "loss": 0.0025, "step": 19950 }, { "epoch": 23.012672811059907, "grad_norm": 0.4377709925174713, "learning_rate": 8.043618090452263e-06, "loss": 0.0023, "step": 19975 }, { "epoch": 23.04147465437788, "grad_norm": 0.44471868872642517, "learning_rate": 8.04110552763819e-06, "loss": 0.0027, "step": 20000 }, { "epoch": 23.04147465437788, "eval_loss": 0.9242417216300964, "eval_runtime": 101.8378, "eval_samples_per_second": 31.079, "eval_steps_per_second": 3.889, "eval_wer": 38.444270603040714, "step": 20000 }, { "epoch": 23.070276497695854, "grad_norm": 0.4535547196865082, "learning_rate": 8.038592964824121e-06, "loss": 0.0021, "step": 20025 }, { "epoch": 23.099078341013826, "grad_norm": 0.4852837026119232, "learning_rate": 8.03608040201005e-06, "loss": 0.0018, "step": 20050 }, { "epoch": 23.127880184331797, "grad_norm": 3.9571735858917236, "learning_rate": 8.03356783919598e-06, "loss": 0.0021, "step": 20075 }, { "epoch": 23.15668202764977, "grad_norm": 0.3189612627029419, "learning_rate": 8.031055276381911e-06, "loss": 0.0016, "step": 20100 }, { "epoch": 23.18548387096774, "grad_norm": 0.3088395297527313, "learning_rate": 8.028542713567839e-06, "loss": 0.0018, "step": 20125 }, { "epoch": 23.214285714285715, "grad_norm": 1.131415843963623, "learning_rate": 8.02603015075377e-06, "loss": 0.0028, "step": 20150 }, { "epoch": 23.243087557603687, "grad_norm": 0.3082955777645111, "learning_rate": 8.023517587939699e-06, "loss": 0.0028, "step": 20175 }, { "epoch": 23.27188940092166, "grad_norm": 0.21362662315368652, "learning_rate": 8.021005025125628e-06, "loss": 0.0021, "step": 20200 }, { "epoch": 23.30069124423963, "grad_norm": 0.2493545114994049, "learning_rate": 8.018592964824122e-06, "loss": 0.002, "step": 20225 }, { "epoch": 23.329493087557605, "grad_norm": 1.6416292190551758, "learning_rate": 8.016080402010051e-06, "loss": 0.0021, "step": 20250 }, { "epoch": 23.358294930875577, "grad_norm": 4.457999229431152, "learning_rate": 8.01356783919598e-06, "loss": 0.003, "step": 20275 }, { "epoch": 23.387096774193548, "grad_norm": 1.3196316957473755, "learning_rate": 8.011055276381911e-06, "loss": 0.0029, "step": 20300 }, { "epoch": 23.41589861751152, "grad_norm": 3.916231632232666, "learning_rate": 8.008542713567839e-06, "loss": 0.0032, "step": 20325 }, { "epoch": 23.444700460829495, "grad_norm": 0.4370511770248413, "learning_rate": 8.00603015075377e-06, "loss": 0.0027, "step": 20350 }, { "epoch": 23.473502304147466, "grad_norm": 0.39745643734931946, "learning_rate": 8.0035175879397e-06, "loss": 0.0029, "step": 20375 }, { "epoch": 23.502304147465438, "grad_norm": 0.2724677324295044, "learning_rate": 8.001005025125629e-06, "loss": 0.0022, "step": 20400 }, { "epoch": 23.53110599078341, "grad_norm": 0.16311800479888916, "learning_rate": 7.99849246231156e-06, "loss": 0.002, "step": 20425 }, { "epoch": 23.55990783410138, "grad_norm": 0.39565083384513855, "learning_rate": 7.995979899497487e-06, "loss": 0.0023, "step": 20450 }, { "epoch": 23.588709677419356, "grad_norm": 0.5105719566345215, "learning_rate": 7.993467336683418e-06, "loss": 0.0024, "step": 20475 }, { "epoch": 23.617511520737327, "grad_norm": 0.37558573484420776, "learning_rate": 7.990954773869348e-06, "loss": 0.002, "step": 20500 }, { "epoch": 23.6463133640553, "grad_norm": 0.6388649940490723, "learning_rate": 7.988442211055277e-06, "loss": 0.0022, "step": 20525 }, { "epoch": 23.67511520737327, "grad_norm": 0.48203355073928833, "learning_rate": 7.985929648241206e-06, "loss": 0.0023, "step": 20550 }, { "epoch": 23.703917050691246, "grad_norm": 0.14328773319721222, "learning_rate": 7.983417085427137e-06, "loss": 0.0021, "step": 20575 }, { "epoch": 23.732718894009217, "grad_norm": 0.36487457156181335, "learning_rate": 7.980904522613065e-06, "loss": 0.0024, "step": 20600 }, { "epoch": 23.76152073732719, "grad_norm": 0.3798920214176178, "learning_rate": 7.978391959798996e-06, "loss": 0.0024, "step": 20625 }, { "epoch": 23.79032258064516, "grad_norm": 0.6046701669692993, "learning_rate": 7.975879396984925e-06, "loss": 0.0027, "step": 20650 }, { "epoch": 23.819124423963135, "grad_norm": 6.382814407348633, "learning_rate": 7.973366834170855e-06, "loss": 0.0026, "step": 20675 }, { "epoch": 23.847926267281107, "grad_norm": 0.3240143656730652, "learning_rate": 7.970854271356786e-06, "loss": 0.004, "step": 20700 }, { "epoch": 23.87672811059908, "grad_norm": 0.4781886339187622, "learning_rate": 7.968341708542713e-06, "loss": 0.0035, "step": 20725 }, { "epoch": 23.90552995391705, "grad_norm": 1.16640043258667, "learning_rate": 7.965829145728644e-06, "loss": 0.0041, "step": 20750 }, { "epoch": 23.93433179723502, "grad_norm": 1.0096765756607056, "learning_rate": 7.963316582914574e-06, "loss": 0.0029, "step": 20775 }, { "epoch": 23.963133640552996, "grad_norm": 0.33333709836006165, "learning_rate": 7.960804020100503e-06, "loss": 0.0043, "step": 20800 }, { "epoch": 23.991935483870968, "grad_norm": 0.4203219711780548, "learning_rate": 7.958291457286432e-06, "loss": 0.0027, "step": 20825 }, { "epoch": 24.02073732718894, "grad_norm": 0.16500654816627502, "learning_rate": 7.955778894472363e-06, "loss": 0.0026, "step": 20850 }, { "epoch": 24.04953917050691, "grad_norm": 1.8736776113510132, "learning_rate": 7.953266331658293e-06, "loss": 0.0019, "step": 20875 }, { "epoch": 24.078341013824886, "grad_norm": 0.5201366543769836, "learning_rate": 7.950753768844222e-06, "loss": 0.0018, "step": 20900 }, { "epoch": 24.107142857142858, "grad_norm": 2.796762228012085, "learning_rate": 7.948241206030151e-06, "loss": 0.0021, "step": 20925 }, { "epoch": 24.13594470046083, "grad_norm": 0.37139081954956055, "learning_rate": 7.94572864321608e-06, "loss": 0.0028, "step": 20950 }, { "epoch": 24.1647465437788, "grad_norm": 1.9158135652542114, "learning_rate": 7.943216080402011e-06, "loss": 0.0021, "step": 20975 }, { "epoch": 24.193548387096776, "grad_norm": 0.4297555983066559, "learning_rate": 7.940703517587939e-06, "loss": 0.0018, "step": 21000 }, { "epoch": 24.193548387096776, "eval_loss": 0.9288270473480225, "eval_runtime": 95.3124, "eval_samples_per_second": 33.207, "eval_steps_per_second": 4.155, "eval_wer": 38.23339342199245, "step": 21000 }, { "epoch": 24.222350230414747, "grad_norm": 0.26113659143447876, "learning_rate": 7.93819095477387e-06, "loss": 0.0017, "step": 21025 }, { "epoch": 24.25115207373272, "grad_norm": 0.43748632073402405, "learning_rate": 7.9356783919598e-06, "loss": 0.0022, "step": 21050 }, { "epoch": 24.27995391705069, "grad_norm": 0.1944546401500702, "learning_rate": 7.933165829145729e-06, "loss": 0.0017, "step": 21075 }, { "epoch": 24.308755760368662, "grad_norm": 1.0659925937652588, "learning_rate": 7.93065326633166e-06, "loss": 0.0019, "step": 21100 }, { "epoch": 24.337557603686637, "grad_norm": 0.12227648496627808, "learning_rate": 7.928140703517589e-06, "loss": 0.0022, "step": 21125 }, { "epoch": 24.36635944700461, "grad_norm": 0.17430192232131958, "learning_rate": 7.925628140703518e-06, "loss": 0.0016, "step": 21150 }, { "epoch": 24.39516129032258, "grad_norm": 0.32086867094039917, "learning_rate": 7.923115577889448e-06, "loss": 0.0019, "step": 21175 }, { "epoch": 24.42396313364055, "grad_norm": 0.38323232531547546, "learning_rate": 7.920603015075377e-06, "loss": 0.0022, "step": 21200 }, { "epoch": 24.452764976958527, "grad_norm": 0.13918966054916382, "learning_rate": 7.918090452261306e-06, "loss": 0.0016, "step": 21225 }, { "epoch": 24.481566820276498, "grad_norm": 0.18693329393863678, "learning_rate": 7.915577889447237e-06, "loss": 0.0019, "step": 21250 }, { "epoch": 24.51036866359447, "grad_norm": 0.2091025859117508, "learning_rate": 7.913065326633167e-06, "loss": 0.0021, "step": 21275 }, { "epoch": 24.53917050691244, "grad_norm": 0.37567439675331116, "learning_rate": 7.910552763819096e-06, "loss": 0.0019, "step": 21300 }, { "epoch": 24.567972350230416, "grad_norm": 2.131410837173462, "learning_rate": 7.908040201005025e-06, "loss": 0.0021, "step": 21325 }, { "epoch": 24.596774193548388, "grad_norm": 1.1627036333084106, "learning_rate": 7.905527638190955e-06, "loss": 0.0021, "step": 21350 }, { "epoch": 24.62557603686636, "grad_norm": 0.1401105523109436, "learning_rate": 7.903015075376886e-06, "loss": 0.0021, "step": 21375 }, { "epoch": 24.65437788018433, "grad_norm": 1.9016780853271484, "learning_rate": 7.900502512562815e-06, "loss": 0.0021, "step": 21400 }, { "epoch": 24.683179723502302, "grad_norm": 0.2895117700099945, "learning_rate": 7.897989949748744e-06, "loss": 0.0025, "step": 21425 }, { "epoch": 24.711981566820278, "grad_norm": 1.3011502027511597, "learning_rate": 7.895477386934674e-06, "loss": 0.0024, "step": 21450 }, { "epoch": 24.74078341013825, "grad_norm": 0.46795564889907837, "learning_rate": 7.892964824120603e-06, "loss": 0.0027, "step": 21475 }, { "epoch": 24.76958525345622, "grad_norm": 0.22540783882141113, "learning_rate": 7.890452261306534e-06, "loss": 0.0026, "step": 21500 }, { "epoch": 24.798387096774192, "grad_norm": 0.1956023871898651, "learning_rate": 7.887939698492463e-06, "loss": 0.0019, "step": 21525 }, { "epoch": 24.827188940092167, "grad_norm": 0.15479375422000885, "learning_rate": 7.885427135678393e-06, "loss": 0.0024, "step": 21550 }, { "epoch": 24.85599078341014, "grad_norm": 0.905436098575592, "learning_rate": 7.882914572864322e-06, "loss": 0.0031, "step": 21575 }, { "epoch": 24.88479262672811, "grad_norm": 2.6657960414886475, "learning_rate": 7.880402010050251e-06, "loss": 0.0027, "step": 21600 }, { "epoch": 24.913594470046082, "grad_norm": 0.32765939831733704, "learning_rate": 7.87788944723618e-06, "loss": 0.0031, "step": 21625 }, { "epoch": 24.942396313364057, "grad_norm": 4.5679731369018555, "learning_rate": 7.875376884422112e-06, "loss": 0.0026, "step": 21650 }, { "epoch": 24.97119815668203, "grad_norm": 0.22829271852970123, "learning_rate": 7.872864321608041e-06, "loss": 0.0021, "step": 21675 }, { "epoch": 25.0, "grad_norm": 0.9648045897483826, "learning_rate": 7.87035175879397e-06, "loss": 0.0018, "step": 21700 }, { "epoch": 25.02880184331797, "grad_norm": 0.17003478109836578, "learning_rate": 7.867839195979901e-06, "loss": 0.0014, "step": 21725 }, { "epoch": 25.057603686635943, "grad_norm": 0.1516946703195572, "learning_rate": 7.865326633165829e-06, "loss": 0.0015, "step": 21750 }, { "epoch": 25.086405529953918, "grad_norm": 0.09871332347393036, "learning_rate": 7.86281407035176e-06, "loss": 0.0012, "step": 21775 }, { "epoch": 25.11520737327189, "grad_norm": 0.09212125092744827, "learning_rate": 7.86030150753769e-06, "loss": 0.0011, "step": 21800 }, { "epoch": 25.14400921658986, "grad_norm": 0.7161795496940613, "learning_rate": 7.857788944723619e-06, "loss": 0.0017, "step": 21825 }, { "epoch": 25.172811059907833, "grad_norm": 0.3720991611480713, "learning_rate": 7.855276381909548e-06, "loss": 0.0018, "step": 21850 }, { "epoch": 25.201612903225808, "grad_norm": 0.3234277665615082, "learning_rate": 7.852763819095477e-06, "loss": 0.0013, "step": 21875 }, { "epoch": 25.23041474654378, "grad_norm": 0.15219411253929138, "learning_rate": 7.850251256281408e-06, "loss": 0.0013, "step": 21900 }, { "epoch": 25.25921658986175, "grad_norm": 0.11881902813911438, "learning_rate": 7.847738693467338e-06, "loss": 0.0014, "step": 21925 }, { "epoch": 25.288018433179722, "grad_norm": 0.140849769115448, "learning_rate": 7.845226130653267e-06, "loss": 0.0015, "step": 21950 }, { "epoch": 25.316820276497698, "grad_norm": 0.15785449743270874, "learning_rate": 7.842713567839196e-06, "loss": 0.0016, "step": 21975 }, { "epoch": 25.34562211981567, "grad_norm": 0.4326983094215393, "learning_rate": 7.840201005025127e-06, "loss": 0.0014, "step": 22000 }, { "epoch": 25.34562211981567, "eval_loss": 0.9465144872665405, "eval_runtime": 97.0981, "eval_samples_per_second": 32.596, "eval_steps_per_second": 4.078, "eval_wer": 38.37624570592837, "step": 22000 }, { "epoch": 25.37442396313364, "grad_norm": 0.15047943592071533, "learning_rate": 7.837688442211055e-06, "loss": 0.0012, "step": 22025 }, { "epoch": 25.403225806451612, "grad_norm": 0.1131584644317627, "learning_rate": 7.835175879396986e-06, "loss": 0.0016, "step": 22050 }, { "epoch": 25.432027649769584, "grad_norm": 0.5928472876548767, "learning_rate": 7.832663316582915e-06, "loss": 0.0017, "step": 22075 }, { "epoch": 25.46082949308756, "grad_norm": 0.7007647156715393, "learning_rate": 7.830150753768845e-06, "loss": 0.0021, "step": 22100 }, { "epoch": 25.48963133640553, "grad_norm": 0.15398329496383667, "learning_rate": 7.827638190954776e-06, "loss": 0.0016, "step": 22125 }, { "epoch": 25.518433179723502, "grad_norm": 0.2075181007385254, "learning_rate": 7.825125628140703e-06, "loss": 0.0018, "step": 22150 }, { "epoch": 25.547235023041473, "grad_norm": 0.5136115550994873, "learning_rate": 7.822613065326634e-06, "loss": 0.0017, "step": 22175 }, { "epoch": 25.57603686635945, "grad_norm": 0.38125309348106384, "learning_rate": 7.820100502512564e-06, "loss": 0.0024, "step": 22200 }, { "epoch": 25.60483870967742, "grad_norm": 0.21021759510040283, "learning_rate": 7.817587939698493e-06, "loss": 0.003, "step": 22225 }, { "epoch": 25.63364055299539, "grad_norm": 2.2954089641571045, "learning_rate": 7.815075376884422e-06, "loss": 0.0022, "step": 22250 }, { "epoch": 25.662442396313363, "grad_norm": 0.32835134863853455, "learning_rate": 7.812562814070353e-06, "loss": 0.0025, "step": 22275 }, { "epoch": 25.691244239631338, "grad_norm": 0.557365357875824, "learning_rate": 7.810050251256283e-06, "loss": 0.0019, "step": 22300 }, { "epoch": 25.72004608294931, "grad_norm": 0.21897892653942108, "learning_rate": 7.807537688442212e-06, "loss": 0.0021, "step": 22325 }, { "epoch": 25.74884792626728, "grad_norm": 0.809830367565155, "learning_rate": 7.805025125628141e-06, "loss": 0.0016, "step": 22350 }, { "epoch": 25.777649769585253, "grad_norm": 2.19582200050354, "learning_rate": 7.80251256281407e-06, "loss": 0.0018, "step": 22375 }, { "epoch": 25.806451612903224, "grad_norm": 0.2633974552154541, "learning_rate": 7.800000000000002e-06, "loss": 0.0018, "step": 22400 }, { "epoch": 25.8352534562212, "grad_norm": 1.1487597227096558, "learning_rate": 7.79748743718593e-06, "loss": 0.0029, "step": 22425 }, { "epoch": 25.86405529953917, "grad_norm": 0.15872685611248016, "learning_rate": 7.79497487437186e-06, "loss": 0.0017, "step": 22450 }, { "epoch": 25.892857142857142, "grad_norm": 0.15796197950839996, "learning_rate": 7.79246231155779e-06, "loss": 0.0016, "step": 22475 }, { "epoch": 25.921658986175114, "grad_norm": 4.004288196563721, "learning_rate": 7.789949748743719e-06, "loss": 0.0018, "step": 22500 }, { "epoch": 25.95046082949309, "grad_norm": 0.2509973645210266, "learning_rate": 7.78743718592965e-06, "loss": 0.0031, "step": 22525 }, { "epoch": 25.97926267281106, "grad_norm": 0.4413185119628906, "learning_rate": 7.78492462311558e-06, "loss": 0.002, "step": 22550 }, { "epoch": 26.008064516129032, "grad_norm": 0.22845827043056488, "learning_rate": 7.782412060301509e-06, "loss": 0.0021, "step": 22575 }, { "epoch": 26.036866359447004, "grad_norm": 0.21273565292358398, "learning_rate": 7.779899497487438e-06, "loss": 0.0013, "step": 22600 }, { "epoch": 26.06566820276498, "grad_norm": 2.1821398735046387, "learning_rate": 7.777386934673367e-06, "loss": 0.0013, "step": 22625 }, { "epoch": 26.09447004608295, "grad_norm": 0.17955072224140167, "learning_rate": 7.774874371859296e-06, "loss": 0.0013, "step": 22650 }, { "epoch": 26.12327188940092, "grad_norm": 0.2776668965816498, "learning_rate": 7.772361809045227e-06, "loss": 0.0015, "step": 22675 }, { "epoch": 26.152073732718893, "grad_norm": 0.11128777265548706, "learning_rate": 7.769849246231155e-06, "loss": 0.0012, "step": 22700 }, { "epoch": 26.180875576036865, "grad_norm": 0.08699720352888107, "learning_rate": 7.767336683417086e-06, "loss": 0.0012, "step": 22725 }, { "epoch": 26.20967741935484, "grad_norm": 0.6124765276908875, "learning_rate": 7.764824120603015e-06, "loss": 0.0013, "step": 22750 }, { "epoch": 26.23847926267281, "grad_norm": 0.40712404251098633, "learning_rate": 7.762311557788945e-06, "loss": 0.0013, "step": 22775 }, { "epoch": 26.267281105990783, "grad_norm": 1.3135100603103638, "learning_rate": 7.759798994974876e-06, "loss": 0.0017, "step": 22800 }, { "epoch": 26.296082949308754, "grad_norm": 0.9609215259552002, "learning_rate": 7.757286432160805e-06, "loss": 0.0021, "step": 22825 }, { "epoch": 26.32488479262673, "grad_norm": 1.147431492805481, "learning_rate": 7.754773869346734e-06, "loss": 0.0018, "step": 22850 }, { "epoch": 26.3536866359447, "grad_norm": 0.258087158203125, "learning_rate": 7.752261306532664e-06, "loss": 0.0019, "step": 22875 }, { "epoch": 26.382488479262673, "grad_norm": 0.6731234192848206, "learning_rate": 7.749748743718593e-06, "loss": 0.0024, "step": 22900 }, { "epoch": 26.411290322580644, "grad_norm": 0.5642282962799072, "learning_rate": 7.747236180904524e-06, "loss": 0.0021, "step": 22925 }, { "epoch": 26.44009216589862, "grad_norm": 0.9383044838905334, "learning_rate": 7.744723618090453e-06, "loss": 0.0031, "step": 22950 }, { "epoch": 26.46889400921659, "grad_norm": 0.15090426802635193, "learning_rate": 7.742211055276383e-06, "loss": 0.0015, "step": 22975 }, { "epoch": 26.497695852534562, "grad_norm": 0.7468026876449585, "learning_rate": 7.739698492462312e-06, "loss": 0.0017, "step": 23000 }, { "epoch": 26.497695852534562, "eval_loss": 0.9553683996200562, "eval_runtime": 92.247, "eval_samples_per_second": 34.31, "eval_steps_per_second": 4.293, "eval_wer": 37.978300057821166, "step": 23000 }, { "epoch": 26.526497695852534, "grad_norm": 0.48173436522483826, "learning_rate": 7.737185929648241e-06, "loss": 0.0022, "step": 23025 }, { "epoch": 26.555299539170505, "grad_norm": 0.501175582408905, "learning_rate": 7.73467336683417e-06, "loss": 0.0042, "step": 23050 }, { "epoch": 26.58410138248848, "grad_norm": 0.6478195786476135, "learning_rate": 7.732160804020102e-06, "loss": 0.0029, "step": 23075 }, { "epoch": 26.612903225806452, "grad_norm": 0.2927117943763733, "learning_rate": 7.729648241206031e-06, "loss": 0.0022, "step": 23100 }, { "epoch": 26.641705069124423, "grad_norm": 1.6736509799957275, "learning_rate": 7.72713567839196e-06, "loss": 0.0021, "step": 23125 }, { "epoch": 26.670506912442395, "grad_norm": 0.4920669198036194, "learning_rate": 7.724623115577891e-06, "loss": 0.0016, "step": 23150 }, { "epoch": 26.69930875576037, "grad_norm": 0.6978172063827515, "learning_rate": 7.722110552763819e-06, "loss": 0.0022, "step": 23175 }, { "epoch": 26.72811059907834, "grad_norm": 1.3670384883880615, "learning_rate": 7.71959798994975e-06, "loss": 0.0026, "step": 23200 }, { "epoch": 26.756912442396313, "grad_norm": 1.023821473121643, "learning_rate": 7.71708542713568e-06, "loss": 0.0034, "step": 23225 }, { "epoch": 26.785714285714285, "grad_norm": 3.015160083770752, "learning_rate": 7.714673366834171e-06, "loss": 0.0028, "step": 23250 }, { "epoch": 26.81451612903226, "grad_norm": 0.45171189308166504, "learning_rate": 7.712160804020102e-06, "loss": 0.0026, "step": 23275 }, { "epoch": 26.84331797235023, "grad_norm": 0.48415061831474304, "learning_rate": 7.70964824120603e-06, "loss": 0.0028, "step": 23300 }, { "epoch": 26.872119815668203, "grad_norm": 0.6840101480484009, "learning_rate": 7.70713567839196e-06, "loss": 0.0022, "step": 23325 }, { "epoch": 26.900921658986174, "grad_norm": 2.51725697517395, "learning_rate": 7.70462311557789e-06, "loss": 0.0031, "step": 23350 }, { "epoch": 26.929723502304146, "grad_norm": 3.0907065868377686, "learning_rate": 7.70211055276382e-06, "loss": 0.0034, "step": 23375 }, { "epoch": 26.95852534562212, "grad_norm": 2.3439996242523193, "learning_rate": 7.69959798994975e-06, "loss": 0.003, "step": 23400 }, { "epoch": 26.987327188940093, "grad_norm": 0.3960202634334564, "learning_rate": 7.69708542713568e-06, "loss": 0.0029, "step": 23425 }, { "epoch": 27.016129032258064, "grad_norm": 0.2512092888355255, "learning_rate": 7.694572864321609e-06, "loss": 0.0032, "step": 23450 }, { "epoch": 27.044930875576036, "grad_norm": 2.5770974159240723, "learning_rate": 7.692060301507538e-06, "loss": 0.0021, "step": 23475 }, { "epoch": 27.07373271889401, "grad_norm": 2.609301805496216, "learning_rate": 7.689547738693468e-06, "loss": 0.0021, "step": 23500 }, { "epoch": 27.102534562211982, "grad_norm": 0.1264917254447937, "learning_rate": 7.687035175879397e-06, "loss": 0.0019, "step": 23525 }, { "epoch": 27.131336405529954, "grad_norm": 1.064979910850525, "learning_rate": 7.684522613065328e-06, "loss": 0.0017, "step": 23550 }, { "epoch": 27.160138248847925, "grad_norm": 0.20335279405117035, "learning_rate": 7.682010050251257e-06, "loss": 0.0019, "step": 23575 }, { "epoch": 27.1889400921659, "grad_norm": 0.7127830386161804, "learning_rate": 7.679497487437187e-06, "loss": 0.0019, "step": 23600 }, { "epoch": 27.217741935483872, "grad_norm": 0.1446090191602707, "learning_rate": 7.676984924623116e-06, "loss": 0.0018, "step": 23625 }, { "epoch": 27.246543778801843, "grad_norm": 0.9763817191123962, "learning_rate": 7.674472361809045e-06, "loss": 0.0025, "step": 23650 }, { "epoch": 27.275345622119815, "grad_norm": 0.2775149941444397, "learning_rate": 7.671959798994976e-06, "loss": 0.0018, "step": 23675 }, { "epoch": 27.304147465437786, "grad_norm": 0.6636967062950134, "learning_rate": 7.669447236180906e-06, "loss": 0.0019, "step": 23700 }, { "epoch": 27.33294930875576, "grad_norm": 0.24331210553646088, "learning_rate": 7.666934673366835e-06, "loss": 0.0015, "step": 23725 }, { "epoch": 27.361751152073733, "grad_norm": 0.6536253094673157, "learning_rate": 7.664422110552764e-06, "loss": 0.0016, "step": 23750 }, { "epoch": 27.390552995391705, "grad_norm": 0.1480594277381897, "learning_rate": 7.661909547738693e-06, "loss": 0.0021, "step": 23775 }, { "epoch": 27.419354838709676, "grad_norm": 0.4785195291042328, "learning_rate": 7.659396984924625e-06, "loss": 0.0023, "step": 23800 }, { "epoch": 27.44815668202765, "grad_norm": 0.1821582019329071, "learning_rate": 7.656884422110554e-06, "loss": 0.002, "step": 23825 }, { "epoch": 27.476958525345623, "grad_norm": 0.21500137448310852, "learning_rate": 7.654371859296483e-06, "loss": 0.0024, "step": 23850 }, { "epoch": 27.505760368663594, "grad_norm": 0.25329267978668213, "learning_rate": 7.651859296482412e-06, "loss": 0.0027, "step": 23875 }, { "epoch": 27.534562211981566, "grad_norm": 0.49291136860847473, "learning_rate": 7.649346733668342e-06, "loss": 0.0018, "step": 23900 }, { "epoch": 27.563364055299537, "grad_norm": 0.25580984354019165, "learning_rate": 7.646834170854271e-06, "loss": 0.0018, "step": 23925 }, { "epoch": 27.592165898617512, "grad_norm": 0.2736532390117645, "learning_rate": 7.644321608040202e-06, "loss": 0.0021, "step": 23950 }, { "epoch": 27.620967741935484, "grad_norm": 2.9828720092773438, "learning_rate": 7.641809045226131e-06, "loss": 0.0023, "step": 23975 }, { "epoch": 27.649769585253456, "grad_norm": 1.0487480163574219, "learning_rate": 7.63929648241206e-06, "loss": 0.0025, "step": 24000 }, { "epoch": 27.649769585253456, "eval_loss": 0.9685602188110352, "eval_runtime": 97.4965, "eval_samples_per_second": 32.463, "eval_steps_per_second": 4.062, "eval_wer": 38.284412094826706, "step": 24000 }, { "epoch": 27.678571428571427, "grad_norm": 0.7033334970474243, "learning_rate": 7.63678391959799e-06, "loss": 0.0019, "step": 24025 }, { "epoch": 27.707373271889402, "grad_norm": 1.673250436782837, "learning_rate": 7.63427135678392e-06, "loss": 0.0015, "step": 24050 }, { "epoch": 27.736175115207374, "grad_norm": 2.226487874984741, "learning_rate": 7.63175879396985e-06, "loss": 0.002, "step": 24075 }, { "epoch": 27.764976958525345, "grad_norm": 1.882995843887329, "learning_rate": 7.629246231155779e-06, "loss": 0.0024, "step": 24100 }, { "epoch": 27.793778801843317, "grad_norm": 1.454390287399292, "learning_rate": 7.626733668341709e-06, "loss": 0.0025, "step": 24125 }, { "epoch": 27.822580645161292, "grad_norm": 0.45975926518440247, "learning_rate": 7.624221105527638e-06, "loss": 0.0017, "step": 24150 }, { "epoch": 27.851382488479263, "grad_norm": 0.2526836693286896, "learning_rate": 7.621708542713569e-06, "loss": 0.0017, "step": 24175 }, { "epoch": 27.880184331797235, "grad_norm": 1.0226490497589111, "learning_rate": 7.619195979899499e-06, "loss": 0.0022, "step": 24200 }, { "epoch": 27.908986175115206, "grad_norm": 1.250891923904419, "learning_rate": 7.616683417085427e-06, "loss": 0.0023, "step": 24225 }, { "epoch": 27.93778801843318, "grad_norm": 1.1611874103546143, "learning_rate": 7.614170854271357e-06, "loss": 0.0028, "step": 24250 }, { "epoch": 27.966589861751153, "grad_norm": 0.560628354549408, "learning_rate": 7.611658291457287e-06, "loss": 0.0027, "step": 24275 }, { "epoch": 27.995391705069125, "grad_norm": 0.7735543847084045, "learning_rate": 7.609145728643217e-06, "loss": 0.0027, "step": 24300 }, { "epoch": 28.024193548387096, "grad_norm": 0.6917845010757446, "learning_rate": 7.606633165829146e-06, "loss": 0.0015, "step": 24325 }, { "epoch": 28.052995391705068, "grad_norm": 1.9410979747772217, "learning_rate": 7.604120603015076e-06, "loss": 0.0017, "step": 24350 }, { "epoch": 28.081797235023043, "grad_norm": 0.3064837157726288, "learning_rate": 7.6016080402010066e-06, "loss": 0.0015, "step": 24375 }, { "epoch": 28.110599078341014, "grad_norm": 0.338507741689682, "learning_rate": 7.599095477386935e-06, "loss": 0.0014, "step": 24400 }, { "epoch": 28.139400921658986, "grad_norm": 1.375050663948059, "learning_rate": 7.596582914572865e-06, "loss": 0.0018, "step": 24425 }, { "epoch": 28.168202764976957, "grad_norm": 0.33915019035339355, "learning_rate": 7.5940703517587945e-06, "loss": 0.0014, "step": 24450 }, { "epoch": 28.197004608294932, "grad_norm": 1.3390936851501465, "learning_rate": 7.591557788944725e-06, "loss": 0.0023, "step": 24475 }, { "epoch": 28.225806451612904, "grad_norm": 0.11323880404233932, "learning_rate": 7.589045226130653e-06, "loss": 0.0011, "step": 24500 }, { "epoch": 28.254608294930875, "grad_norm": 0.19411849975585938, "learning_rate": 7.586532663316583e-06, "loss": 0.0024, "step": 24525 }, { "epoch": 28.283410138248847, "grad_norm": 5.807502269744873, "learning_rate": 7.584020100502513e-06, "loss": 0.0017, "step": 24550 }, { "epoch": 28.31221198156682, "grad_norm": 0.21173454821109772, "learning_rate": 7.581507537688443e-06, "loss": 0.0014, "step": 24575 }, { "epoch": 28.341013824884794, "grad_norm": 0.4089387059211731, "learning_rate": 7.578994974874373e-06, "loss": 0.0018, "step": 24600 }, { "epoch": 28.369815668202765, "grad_norm": 0.24780121445655823, "learning_rate": 7.576482412060302e-06, "loss": 0.0019, "step": 24625 }, { "epoch": 28.398617511520737, "grad_norm": 0.14702747762203217, "learning_rate": 7.5739698492462325e-06, "loss": 0.0021, "step": 24650 }, { "epoch": 28.427419354838708, "grad_norm": 1.7569717168807983, "learning_rate": 7.571457286432161e-06, "loss": 0.0017, "step": 24675 }, { "epoch": 28.456221198156683, "grad_norm": 0.19148290157318115, "learning_rate": 7.568944723618091e-06, "loss": 0.0017, "step": 24700 }, { "epoch": 28.485023041474655, "grad_norm": 0.12815915048122406, "learning_rate": 7.5664321608040205e-06, "loss": 0.0018, "step": 24725 }, { "epoch": 28.513824884792626, "grad_norm": 0.17996148765087128, "learning_rate": 7.563919597989951e-06, "loss": 0.0016, "step": 24750 }, { "epoch": 28.542626728110598, "grad_norm": 0.4442344009876251, "learning_rate": 7.561407035175879e-06, "loss": 0.0031, "step": 24775 }, { "epoch": 28.571428571428573, "grad_norm": 0.463791161775589, "learning_rate": 7.558894472361809e-06, "loss": 0.0014, "step": 24800 }, { "epoch": 28.600230414746544, "grad_norm": 0.2696816325187683, "learning_rate": 7.5563819095477395e-06, "loss": 0.0015, "step": 24825 }, { "epoch": 28.629032258064516, "grad_norm": 0.7207157611846924, "learning_rate": 7.553869346733669e-06, "loss": 0.0013, "step": 24850 }, { "epoch": 28.657834101382488, "grad_norm": 2.0901260375976562, "learning_rate": 7.551356783919599e-06, "loss": 0.0016, "step": 24875 }, { "epoch": 28.686635944700463, "grad_norm": 0.22260797023773193, "learning_rate": 7.548844221105528e-06, "loss": 0.0016, "step": 24900 }, { "epoch": 28.715437788018434, "grad_norm": 2.0484445095062256, "learning_rate": 7.5463316582914585e-06, "loss": 0.0016, "step": 24925 }, { "epoch": 28.744239631336406, "grad_norm": 0.519088089466095, "learning_rate": 7.543819095477387e-06, "loss": 0.0015, "step": 24950 }, { "epoch": 28.773041474654377, "grad_norm": 1.1129772663116455, "learning_rate": 7.541306532663317e-06, "loss": 0.0017, "step": 24975 }, { "epoch": 28.80184331797235, "grad_norm": 3.140409469604492, "learning_rate": 7.538793969849247e-06, "loss": 0.002, "step": 25000 }, { "epoch": 28.80184331797235, "eval_loss": 0.9649926424026489, "eval_runtime": 103.2869, "eval_samples_per_second": 30.643, "eval_steps_per_second": 3.834, "eval_wer": 38.31162205367165, "step": 25000 }, { "epoch": 28.830645161290324, "grad_norm": 0.2126360386610031, "learning_rate": 7.536281407035177e-06, "loss": 0.0014, "step": 25025 }, { "epoch": 28.859447004608295, "grad_norm": 0.6415627002716064, "learning_rate": 7.533768844221107e-06, "loss": 0.0031, "step": 25050 }, { "epoch": 28.888248847926267, "grad_norm": 0.4984152913093567, "learning_rate": 7.531256281407035e-06, "loss": 0.0023, "step": 25075 }, { "epoch": 28.91705069124424, "grad_norm": 0.15690945088863373, "learning_rate": 7.528743718592965e-06, "loss": 0.0019, "step": 25100 }, { "epoch": 28.945852534562214, "grad_norm": 2.364226818084717, "learning_rate": 7.526231155778895e-06, "loss": 0.0014, "step": 25125 }, { "epoch": 28.974654377880185, "grad_norm": 0.848777174949646, "learning_rate": 7.523718592964825e-06, "loss": 0.0018, "step": 25150 }, { "epoch": 29.003456221198157, "grad_norm": 0.26915377378463745, "learning_rate": 7.521206030150754e-06, "loss": 0.0019, "step": 25175 }, { "epoch": 29.032258064516128, "grad_norm": 0.4228498041629791, "learning_rate": 7.518693467336684e-06, "loss": 0.0015, "step": 25200 }, { "epoch": 29.0610599078341, "grad_norm": 0.10511095076799393, "learning_rate": 7.5161809045226146e-06, "loss": 0.0016, "step": 25225 }, { "epoch": 29.089861751152075, "grad_norm": 0.7001926302909851, "learning_rate": 7.513668341708543e-06, "loss": 0.0015, "step": 25250 }, { "epoch": 29.118663594470046, "grad_norm": 2.306025266647339, "learning_rate": 7.511155778894473e-06, "loss": 0.0014, "step": 25275 }, { "epoch": 29.147465437788018, "grad_norm": 0.8390582203865051, "learning_rate": 7.5086432160804025e-06, "loss": 0.0014, "step": 25300 }, { "epoch": 29.17626728110599, "grad_norm": 0.08259406685829163, "learning_rate": 7.506130653266333e-06, "loss": 0.001, "step": 25325 }, { "epoch": 29.205069124423964, "grad_norm": 0.15182006359100342, "learning_rate": 7.503618090452261e-06, "loss": 0.0011, "step": 25350 }, { "epoch": 29.233870967741936, "grad_norm": 0.0862535759806633, "learning_rate": 7.501105527638191e-06, "loss": 0.0008, "step": 25375 }, { "epoch": 29.262672811059907, "grad_norm": 0.1828584522008896, "learning_rate": 7.498592964824121e-06, "loss": 0.001, "step": 25400 }, { "epoch": 29.29147465437788, "grad_norm": 0.07187247276306152, "learning_rate": 7.496080402010051e-06, "loss": 0.001, "step": 25425 }, { "epoch": 29.320276497695854, "grad_norm": 0.09676637500524521, "learning_rate": 7.493567839195981e-06, "loss": 0.0009, "step": 25450 }, { "epoch": 29.349078341013826, "grad_norm": 0.19479386508464813, "learning_rate": 7.49105527638191e-06, "loss": 0.0009, "step": 25475 }, { "epoch": 29.377880184331797, "grad_norm": 0.28262045979499817, "learning_rate": 7.4885427135678405e-06, "loss": 0.001, "step": 25500 }, { "epoch": 29.40668202764977, "grad_norm": 0.13244527578353882, "learning_rate": 7.486030150753769e-06, "loss": 0.0009, "step": 25525 }, { "epoch": 29.43548387096774, "grad_norm": 0.11293485760688782, "learning_rate": 7.483517587939699e-06, "loss": 0.0008, "step": 25550 }, { "epoch": 29.464285714285715, "grad_norm": 0.09940329939126968, "learning_rate": 7.4810050251256285e-06, "loss": 0.0008, "step": 25575 }, { "epoch": 29.493087557603687, "grad_norm": 0.1311177909374237, "learning_rate": 7.478492462311559e-06, "loss": 0.0013, "step": 25600 }, { "epoch": 29.52188940092166, "grad_norm": 0.38769614696502686, "learning_rate": 7.476080402010051e-06, "loss": 0.0012, "step": 25625 }, { "epoch": 29.55069124423963, "grad_norm": 0.15979507565498352, "learning_rate": 7.473567839195981e-06, "loss": 0.0015, "step": 25650 }, { "epoch": 29.579493087557605, "grad_norm": 0.12207220494747162, "learning_rate": 7.47105527638191e-06, "loss": 0.001, "step": 25675 }, { "epoch": 29.608294930875577, "grad_norm": 0.11001387983560562, "learning_rate": 7.46854271356784e-06, "loss": 0.0012, "step": 25700 }, { "epoch": 29.637096774193548, "grad_norm": 0.18420222401618958, "learning_rate": 7.466030150753769e-06, "loss": 0.001, "step": 25725 }, { "epoch": 29.66589861751152, "grad_norm": 0.21532350778579712, "learning_rate": 7.463517587939699e-06, "loss": 0.0016, "step": 25750 }, { "epoch": 29.694700460829495, "grad_norm": 0.1794181615114212, "learning_rate": 7.461005025125629e-06, "loss": 0.0018, "step": 25775 }, { "epoch": 29.723502304147466, "grad_norm": 0.11416684836149216, "learning_rate": 7.458492462311559e-06, "loss": 0.001, "step": 25800 }, { "epoch": 29.752304147465438, "grad_norm": 0.322307825088501, "learning_rate": 7.455979899497488e-06, "loss": 0.0012, "step": 25825 }, { "epoch": 29.78110599078341, "grad_norm": 0.8735306859016418, "learning_rate": 7.4534673366834175e-06, "loss": 0.0022, "step": 25850 }, { "epoch": 29.80990783410138, "grad_norm": 2.082735300064087, "learning_rate": 7.450954773869348e-06, "loss": 0.0019, "step": 25875 }, { "epoch": 29.838709677419356, "grad_norm": 0.3855551481246948, "learning_rate": 7.448442211055277e-06, "loss": 0.0017, "step": 25900 }, { "epoch": 29.867511520737327, "grad_norm": 0.49077606201171875, "learning_rate": 7.445929648241207e-06, "loss": 0.0016, "step": 25925 }, { "epoch": 29.8963133640553, "grad_norm": 0.1507556438446045, "learning_rate": 7.443417085427136e-06, "loss": 0.0016, "step": 25950 }, { "epoch": 29.92511520737327, "grad_norm": 2.349721670150757, "learning_rate": 7.440904522613066e-06, "loss": 0.003, "step": 25975 }, { "epoch": 29.953917050691246, "grad_norm": 1.583411455154419, "learning_rate": 7.438391959798995e-06, "loss": 0.0026, "step": 26000 }, { "epoch": 29.953917050691246, "eval_loss": 0.9768039584159851, "eval_runtime": 109.434, "eval_samples_per_second": 28.922, "eval_steps_per_second": 3.619, "eval_wer": 38.559912928131695, "step": 26000 } ], "logging_steps": 25, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 116, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.04686334664704e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }