{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18867924528301888, "grad_norm": NaN, "learning_rate": 4.998119881260576e-05, "loss": 4.699731063842774, "num_input_tokens_seen": 91920, "step": 5, "train_runtime": 98.4322, "train_tokens_per_second": 933.841 }, { "epoch": 0.37735849056603776, "grad_norm": NaN, "learning_rate": 4.998119881260576e-05, "loss": 10.709168243408204, "num_input_tokens_seen": 191440, "step": 10, "train_runtime": 200.6741, "train_tokens_per_second": 953.985 }, { "epoch": 0.5660377358490566, "grad_norm": NaN, "learning_rate": 4.998119881260576e-05, "loss": 11.792630004882813, "num_input_tokens_seen": 290528, "step": 15, "train_runtime": 303.0055, "train_tokens_per_second": 958.821 }, { "epoch": 0.7547169811320755, "grad_norm": NaN, "learning_rate": 4.99248235291948e-05, "loss": 13.518409729003906, "num_input_tokens_seen": 384736, "step": 20, "train_runtime": 402.2728, "train_tokens_per_second": 956.406 }, { "epoch": 0.9433962264150944, "grad_norm": NaN, "learning_rate": 4.99248235291948e-05, "loss": 20.178372192382813, "num_input_tokens_seen": 488112, "step": 25, "train_runtime": 508.669, "train_tokens_per_second": 959.587 }, { "epoch": 1.1132075471698113, "grad_norm": NaN, "learning_rate": 4.983095894354858e-05, "loss": 7.201190948486328, "num_input_tokens_seen": 584008, "step": 30, "train_runtime": 612.5705, "train_tokens_per_second": 953.373 }, { "epoch": 1.3018867924528301, "grad_norm": NaN, "learning_rate": 4.983095894354858e-05, "loss": 16.000038146972656, "num_input_tokens_seen": 687112, "step": 35, "train_runtime": 721.4375, "train_tokens_per_second": 952.421 }, { "epoch": 1.490566037735849, "grad_norm": NaN, "learning_rate": 4.983095894354858e-05, "loss": 9.682757568359374, "num_input_tokens_seen": 793608, "step": 40, "train_runtime": 833.895, "train_tokens_per_second": 951.688 }, { "epoch": 1.6792452830188678, "grad_norm": 0.0, "learning_rate": 4.983095894354858e-05, "loss": 6.101836395263672, "num_input_tokens_seen": 902728, "step": 45, "train_runtime": 949.9633, "train_tokens_per_second": 950.277 }, { "epoch": 1.8679245283018868, "grad_norm": NaN, "learning_rate": 4.969974623692023e-05, "loss": 10.295503997802735, "num_input_tokens_seen": 992696, "step": 50, "train_runtime": 1041.5791, "train_tokens_per_second": 953.068 }, { "epoch": 2.0377358490566038, "grad_norm": NaN, "learning_rate": 4.9326121764495596e-05, "loss": 4.089427185058594, "num_input_tokens_seen": 1091000, "step": 55, "train_runtime": 1146.5611, "train_tokens_per_second": 951.541 }, { "epoch": 2.2264150943396226, "grad_norm": NaN, "learning_rate": 4.9326121764495596e-05, "loss": 26.63399658203125, "num_input_tokens_seen": 1190312, "step": 60, "train_runtime": 1249.7851, "train_tokens_per_second": 952.413 }, { "epoch": 2.4150943396226414, "grad_norm": NaN, "learning_rate": 4.9084271965397014e-05, "loss": 5.875924301147461, "num_input_tokens_seen": 1291288, "step": 65, "train_runtime": 1358.4058, "train_tokens_per_second": 950.591 }, { "epoch": 2.6037735849056602, "grad_norm": NaN, "learning_rate": 4.9084271965397014e-05, "loss": 4.943584823608399, "num_input_tokens_seen": 1398264, "step": 70, "train_runtime": 1475.5209, "train_tokens_per_second": 947.641 }, { "epoch": 2.7924528301886795, "grad_norm": NaN, "learning_rate": 4.9084271965397014e-05, "loss": 11.34936752319336, "num_input_tokens_seen": 1502360, "step": 75, "train_runtime": 1584.2311, "train_tokens_per_second": 948.321 }, { "epoch": 2.981132075471698, "grad_norm": NaN, "learning_rate": 4.849231551964771e-05, "loss": 3.3955615997314452, "num_input_tokens_seen": 1586952, "step": 80, "train_runtime": 1668.9061, "train_tokens_per_second": 950.894 }, { "epoch": 3.0, "num_input_tokens_seen": 1595528, "step": 81, "total_flos": 3053620026209280.0, "train_loss": 10.295250705730767, "train_runtime": 1677.9647, "train_samples_per_second": 0.756, "train_steps_per_second": 0.048 } ], "logging_steps": 5, "max_steps": 81, "num_input_tokens_seen": 1595528, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3053620026209280.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }