{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.983957219251337, "eval_steps": 500, "global_step": 46, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0213903743315508, "grad_norm": 13.02454662322998, "learning_rate": 0.0, "loss": 1.6624, "step": 1 }, { "epoch": 0.0427807486631016, "grad_norm": 12.534032821655273, "learning_rate": 2.0000000000000003e-06, "loss": 1.6374, "step": 2 }, { "epoch": 0.06417112299465241, "grad_norm": 12.500082015991211, "learning_rate": 4.000000000000001e-06, "loss": 1.5855, "step": 3 }, { "epoch": 0.0855614973262032, "grad_norm": 19.927814483642578, "learning_rate": 6e-06, "loss": 1.585, "step": 4 }, { "epoch": 0.10695187165775401, "grad_norm": 8.535591125488281, "learning_rate": 8.000000000000001e-06, "loss": 1.4463, "step": 5 }, { "epoch": 0.12834224598930483, "grad_norm": 8.471524238586426, "learning_rate": 1e-05, "loss": 1.3428, "step": 6 }, { "epoch": 0.1497326203208556, "grad_norm": 6.918763637542725, "learning_rate": 9.756097560975611e-06, "loss": 1.3263, "step": 7 }, { "epoch": 0.1711229946524064, "grad_norm": 10.45449161529541, "learning_rate": 9.51219512195122e-06, "loss": 1.1986, "step": 8 }, { "epoch": 0.1925133689839572, "grad_norm": 7.750345706939697, "learning_rate": 9.268292682926831e-06, "loss": 1.1741, "step": 9 }, { "epoch": 0.21390374331550802, "grad_norm": 18.263578414916992, "learning_rate": 9.02439024390244e-06, "loss": 1.1137, "step": 10 }, { "epoch": 0.23529411764705882, "grad_norm": 10.370940208435059, "learning_rate": 8.78048780487805e-06, "loss": 1.0881, "step": 11 }, { "epoch": 0.25668449197860965, "grad_norm": 6.241279125213623, "learning_rate": 8.536585365853658e-06, "loss": 1.0457, "step": 12 }, { "epoch": 0.27807486631016043, "grad_norm": 6.566608905792236, "learning_rate": 8.292682926829268e-06, "loss": 1.0565, "step": 13 }, { "epoch": 0.2994652406417112, "grad_norm": 50.85325241088867, "learning_rate": 8.048780487804879e-06, "loss": 0.9655, "step": 14 }, { "epoch": 0.32085561497326204, "grad_norm": 4.764376163482666, "learning_rate": 7.804878048780489e-06, "loss": 1.07, "step": 15 }, { "epoch": 0.3422459893048128, "grad_norm": 8.540885925292969, "learning_rate": 7.560975609756098e-06, "loss": 0.9066, "step": 16 }, { "epoch": 0.36363636363636365, "grad_norm": 7.087356090545654, "learning_rate": 7.317073170731707e-06, "loss": 1.0001, "step": 17 }, { "epoch": 0.3850267379679144, "grad_norm": 7.610484600067139, "learning_rate": 7.0731707317073175e-06, "loss": 0.9206, "step": 18 }, { "epoch": 0.40641711229946526, "grad_norm": 19.34569549560547, "learning_rate": 6.829268292682928e-06, "loss": 0.9691, "step": 19 }, { "epoch": 0.42780748663101603, "grad_norm": 7.0099053382873535, "learning_rate": 6.585365853658538e-06, "loss": 0.9798, "step": 20 }, { "epoch": 0.44919786096256686, "grad_norm": 5.669744491577148, "learning_rate": 6.341463414634147e-06, "loss": 0.9754, "step": 21 }, { "epoch": 0.47058823529411764, "grad_norm": 4.6388630867004395, "learning_rate": 6.0975609756097564e-06, "loss": 0.8885, "step": 22 }, { "epoch": 0.4919786096256685, "grad_norm": 8.106114387512207, "learning_rate": 5.853658536585366e-06, "loss": 0.9252, "step": 23 }, { "epoch": 0.5133689839572193, "grad_norm": 7.336320400238037, "learning_rate": 5.609756097560977e-06, "loss": 0.916, "step": 24 }, { "epoch": 0.5347593582887701, "grad_norm": 18.173933029174805, "learning_rate": 5.365853658536586e-06, "loss": 0.8807, "step": 25 }, { "epoch": 0.5561497326203209, "grad_norm": 6.46876335144043, "learning_rate": 5.121951219512195e-06, "loss": 0.8674, "step": 26 }, { "epoch": 0.5775401069518716, "grad_norm": 7.97756290435791, "learning_rate": 4.8780487804878055e-06, "loss": 0.8969, "step": 27 }, { "epoch": 0.5989304812834224, "grad_norm": 18.289745330810547, "learning_rate": 4.634146341463416e-06, "loss": 0.9367, "step": 28 }, { "epoch": 0.6203208556149733, "grad_norm": 6.36326789855957, "learning_rate": 4.390243902439025e-06, "loss": 0.849, "step": 29 }, { "epoch": 0.6417112299465241, "grad_norm": 6.118152618408203, "learning_rate": 4.146341463414634e-06, "loss": 0.8319, "step": 30 }, { "epoch": 0.6631016042780749, "grad_norm": 9.238419532775879, "learning_rate": 3.902439024390244e-06, "loss": 0.9253, "step": 31 }, { "epoch": 0.6844919786096256, "grad_norm": 5.425105571746826, "learning_rate": 3.6585365853658537e-06, "loss": 0.8489, "step": 32 }, { "epoch": 0.7058823529411765, "grad_norm": 6.4847731590271, "learning_rate": 3.414634146341464e-06, "loss": 0.8567, "step": 33 }, { "epoch": 0.7272727272727273, "grad_norm": 6.540438175201416, "learning_rate": 3.1707317073170736e-06, "loss": 0.9211, "step": 34 }, { "epoch": 0.7486631016042781, "grad_norm": 10.181364059448242, "learning_rate": 2.926829268292683e-06, "loss": 0.8605, "step": 35 }, { "epoch": 0.7700534759358288, "grad_norm": 5.964487075805664, "learning_rate": 2.682926829268293e-06, "loss": 0.8089, "step": 36 }, { "epoch": 0.7914438502673797, "grad_norm": 9.849525451660156, "learning_rate": 2.4390243902439027e-06, "loss": 0.8418, "step": 37 }, { "epoch": 0.8128342245989305, "grad_norm": 6.233251571655273, "learning_rate": 2.1951219512195125e-06, "loss": 0.8011, "step": 38 }, { "epoch": 0.8342245989304813, "grad_norm": 4.578292369842529, "learning_rate": 1.951219512195122e-06, "loss": 0.9271, "step": 39 }, { "epoch": 0.8556149732620321, "grad_norm": 9.469111442565918, "learning_rate": 1.707317073170732e-06, "loss": 0.9077, "step": 40 }, { "epoch": 0.8770053475935828, "grad_norm": 25.019927978515625, "learning_rate": 1.4634146341463414e-06, "loss": 0.8391, "step": 41 }, { "epoch": 0.8983957219251337, "grad_norm": 4.891110897064209, "learning_rate": 1.2195121951219514e-06, "loss": 0.8804, "step": 42 }, { "epoch": 0.9197860962566845, "grad_norm": 5.689752578735352, "learning_rate": 9.75609756097561e-07, "loss": 0.8833, "step": 43 }, { "epoch": 0.9411764705882353, "grad_norm": 5.5397772789001465, "learning_rate": 7.317073170731707e-07, "loss": 0.8524, "step": 44 }, { "epoch": 0.9625668449197861, "grad_norm": 9.089788436889648, "learning_rate": 4.878048780487805e-07, "loss": 0.8907, "step": 45 }, { "epoch": 0.983957219251337, "grad_norm": 5.81181526184082, "learning_rate": 2.439024390243903e-07, "loss": 0.8489, "step": 46 }, { "epoch": 0.983957219251337, "step": 46, "total_flos": 199575490265088.0, "train_loss": 1.020340691442075, "train_runtime": 3230.2657, "train_samples_per_second": 0.463, "train_steps_per_second": 0.014 } ], "logging_steps": 1.0, "max_steps": 46, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 199575490265088.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }