{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.832, "eval_steps": 500, "global_step": 45, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.064, "grad_norm": 2.67690372467041, "learning_rate": 2.0000000000000003e-06, "loss": 0.8906, "step": 1 }, { "epoch": 0.128, "grad_norm": 2.5836398601531982, "learning_rate": 4.000000000000001e-06, "loss": 0.8844, "step": 2 }, { "epoch": 0.192, "grad_norm": 2.692220687866211, "learning_rate": 6e-06, "loss": 0.7934, "step": 3 }, { "epoch": 0.256, "grad_norm": 2.424602508544922, "learning_rate": 8.000000000000001e-06, "loss": 0.8946, "step": 4 }, { "epoch": 0.32, "grad_norm": 1.7447768449783325, "learning_rate": 1e-05, "loss": 0.7398, "step": 5 }, { "epoch": 0.384, "grad_norm": 0.985548734664917, "learning_rate": 9.984586668665641e-06, "loss": 0.7794, "step": 6 }, { "epoch": 0.448, "grad_norm": 0.9407026171684265, "learning_rate": 9.938441702975689e-06, "loss": 0.7591, "step": 7 }, { "epoch": 0.512, "grad_norm": 1.5231335163116455, "learning_rate": 9.861849601988384e-06, "loss": 0.7895, "step": 8 }, { "epoch": 0.576, "grad_norm": 1.7350199222564697, "learning_rate": 9.755282581475769e-06, "loss": 0.8721, "step": 9 }, { "epoch": 0.64, "grad_norm": 1.5788978338241577, "learning_rate": 9.619397662556434e-06, "loss": 0.8114, "step": 10 }, { "epoch": 0.704, "grad_norm": 1.2389469146728516, "learning_rate": 9.45503262094184e-06, "loss": 0.7008, "step": 11 }, { "epoch": 0.768, "grad_norm": 1.423549771308899, "learning_rate": 9.263200821770462e-06, "loss": 0.7585, "step": 12 }, { "epoch": 0.832, "grad_norm": 1.1815698146820068, "learning_rate": 9.045084971874738e-06, "loss": 0.6632, "step": 13 }, { "epoch": 0.896, "grad_norm": 1.1034165620803833, "learning_rate": 8.802029828000157e-06, "loss": 0.7416, "step": 14 }, { "epoch": 0.96, "grad_norm": 0.8475093245506287, "learning_rate": 8.535533905932739e-06, "loss": 0.7517, "step": 15 }, { "epoch": 1.0, "grad_norm": 0.8475093245506287, "learning_rate": 8.247240241650918e-06, "loss": 0.7802, "step": 16 }, { "epoch": 1.064, "grad_norm": 1.1045087575912476, "learning_rate": 7.938926261462366e-06, "loss": 0.6967, "step": 17 }, { "epoch": 1.1280000000000001, "grad_norm": 0.6255977153778076, "learning_rate": 7.612492823579744e-06, "loss": 0.6597, "step": 18 }, { "epoch": 1.192, "grad_norm": 0.7395212054252625, "learning_rate": 7.269952498697734e-06, "loss": 0.6748, "step": 19 }, { "epoch": 1.256, "grad_norm": 0.6993595957756042, "learning_rate": 6.913417161825449e-06, "loss": 0.7681, "step": 20 }, { "epoch": 1.32, "grad_norm": 0.7346011996269226, "learning_rate": 6.545084971874738e-06, "loss": 0.7262, "step": 21 }, { "epoch": 1.384, "grad_norm": 0.6753884553909302, "learning_rate": 6.1672268192795285e-06, "loss": 0.6249, "step": 22 }, { "epoch": 1.448, "grad_norm": 0.7035785913467407, "learning_rate": 5.782172325201155e-06, "loss": 0.6907, "step": 23 }, { "epoch": 1.512, "grad_norm": 0.6587271094322205, "learning_rate": 5.392295478639226e-06, "loss": 0.6754, "step": 24 }, { "epoch": 1.576, "grad_norm": 0.5424773097038269, "learning_rate": 5e-06, "loss": 0.6626, "step": 25 }, { "epoch": 1.6400000000000001, "grad_norm": 0.5312824845314026, "learning_rate": 4.6077045213607765e-06, "loss": 0.7369, "step": 26 }, { "epoch": 1.704, "grad_norm": 0.5092198848724365, "learning_rate": 4.217827674798845e-06, "loss": 0.766, "step": 27 }, { "epoch": 1.768, "grad_norm": 0.49842214584350586, "learning_rate": 3.832773180720475e-06, "loss": 0.7003, "step": 28 }, { "epoch": 1.8319999999999999, "grad_norm": 0.5175044536590576, "learning_rate": 3.4549150281252635e-06, "loss": 0.7393, "step": 29 }, { "epoch": 1.896, "grad_norm": 0.48302003741264343, "learning_rate": 3.0865828381745515e-06, "loss": 0.7018, "step": 30 }, { "epoch": 1.96, "grad_norm": 0.4566391110420227, "learning_rate": 2.7300475013022666e-06, "loss": 0.6598, "step": 31 }, { "epoch": 2.0, "grad_norm": 0.6593987941741943, "learning_rate": 2.387507176420256e-06, "loss": 0.651, "step": 32 }, { "epoch": 2.064, "grad_norm": 0.6568493843078613, "learning_rate": 2.061073738537635e-06, "loss": 0.7002, "step": 33 }, { "epoch": 2.128, "grad_norm": 0.4518449306488037, "learning_rate": 1.7527597583490825e-06, "loss": 0.7008, "step": 34 }, { "epoch": 2.192, "grad_norm": 0.4067769944667816, "learning_rate": 1.4644660940672628e-06, "loss": 0.6299, "step": 35 }, { "epoch": 2.2560000000000002, "grad_norm": 0.41410645842552185, "learning_rate": 1.1979701719998454e-06, "loss": 0.6832, "step": 36 }, { "epoch": 2.32, "grad_norm": 0.33175525069236755, "learning_rate": 9.549150281252633e-07, "loss": 0.5874, "step": 37 }, { "epoch": 2.384, "grad_norm": 0.4822118580341339, "learning_rate": 7.367991782295392e-07, "loss": 0.6533, "step": 38 }, { "epoch": 2.448, "grad_norm": 0.4443027079105377, "learning_rate": 5.449673790581611e-07, "loss": 0.7716, "step": 39 }, { "epoch": 2.512, "grad_norm": 0.38352447748184204, "learning_rate": 3.8060233744356634e-07, "loss": 0.7014, "step": 40 }, { "epoch": 2.576, "grad_norm": 0.39618387818336487, "learning_rate": 2.447174185242324e-07, "loss": 0.5842, "step": 41 }, { "epoch": 2.64, "grad_norm": 0.3571045398712158, "learning_rate": 1.3815039801161723e-07, "loss": 0.6552, "step": 42 }, { "epoch": 2.7039999999999997, "grad_norm": 0.44936269521713257, "learning_rate": 6.15582970243117e-08, "loss": 0.6133, "step": 43 }, { "epoch": 2.768, "grad_norm": 0.40641671419143677, "learning_rate": 1.541333133436018e-08, "loss": 0.6315, "step": 44 }, { "epoch": 2.832, "grad_norm": 0.4280812740325928, "learning_rate": 0.0, "loss": 0.6455, "step": 45 }, { "epoch": 2.832, "step": 45, "total_flos": 75420674818048.0, "train_loss": 0.7178263836436801, "train_runtime": 10468.2018, "train_samples_per_second": 0.287, "train_steps_per_second": 0.004 } ], "logging_steps": 1, "max_steps": 45, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 75420674818048.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }