{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3099381673356165, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 9.7906, "step": 50 }, { "epoch": 0.01, "learning_rate": 4e-05, "loss": 7.9188, "step": 100 }, { "epoch": 0.01, "learning_rate": 6e-05, "loss": 6.8487, "step": 150 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 6.0994, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.0001, "loss": 5.7442, "step": 250 }, { "epoch": 0.02, "learning_rate": 0.00012, "loss": 5.5492, "step": 300 }, { "epoch": 0.02, "learning_rate": 0.00014000000000000001, "loss": 5.4021, "step": 350 }, { "epoch": 0.02, "learning_rate": 0.00016, "loss": 5.2486, "step": 400 }, { "epoch": 0.03, "learning_rate": 0.00017999999999999998, "loss": 5.128, "step": 450 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 5.0186, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00022, "loss": 4.9228, "step": 550 }, { "epoch": 0.04, "learning_rate": 0.00024, "loss": 4.8538, "step": 600 }, { "epoch": 0.04, "learning_rate": 0.00026000000000000003, "loss": 4.7268, "step": 650 }, { "epoch": 0.04, "learning_rate": 0.00028000000000000003, "loss": 4.6724, "step": 700 }, { "epoch": 0.05, "learning_rate": 0.0003, "loss": 4.5713, "step": 750 }, { "epoch": 0.05, "learning_rate": 0.00032, "loss": 4.488, "step": 800 }, { "epoch": 0.05, "learning_rate": 0.00034, "loss": 4.4322, "step": 850 }, { "epoch": 0.06, "learning_rate": 0.00035999999999999997, "loss": 4.3735, "step": 900 }, { "epoch": 0.06, "learning_rate": 0.00038, "loss": 4.3239, "step": 950 }, { "epoch": 0.06, "learning_rate": 0.0004, "loss": 4.254, "step": 1000 }, { "epoch": 0.07, "learning_rate": 0.00042, "loss": 4.2093, "step": 1050 }, { "epoch": 0.07, "learning_rate": 0.00044, "loss": 4.1751, "step": 1100 }, { "epoch": 0.07, "learning_rate": 0.00046, "loss": 4.11, "step": 1150 }, { "epoch": 0.07, "learning_rate": 0.00048, "loss": 4.0559, "step": 1200 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 4.0179, "step": 1250 }, { "epoch": 0.08, "learning_rate": 0.0005200000000000001, "loss": 3.9553, "step": 1300 }, { "epoch": 0.08, "learning_rate": 0.00054, "loss": 3.8982, "step": 1350 }, { "epoch": 0.09, "learning_rate": 0.0005600000000000001, "loss": 3.8803, "step": 1400 }, { "epoch": 0.09, "learning_rate": 0.00058, "loss": 3.865, "step": 1450 }, { "epoch": 0.09, "learning_rate": 0.0006, "loss": 3.8174, "step": 1500 }, { "epoch": 0.1, "learning_rate": 0.00062, "loss": 3.8031, "step": 1550 }, { "epoch": 0.1, "learning_rate": 0.00064, "loss": 3.7598, "step": 1600 }, { "epoch": 0.1, "learning_rate": 0.00066, "loss": 3.7252, "step": 1650 }, { "epoch": 0.11, "learning_rate": 0.00068, "loss": 3.715, "step": 1700 }, { "epoch": 0.11, "learning_rate": 0.0007, "loss": 3.6863, "step": 1750 }, { "epoch": 0.11, "learning_rate": 0.0007199999999999999, "loss": 3.6786, "step": 1800 }, { "epoch": 0.11, "learning_rate": 0.00074, "loss": 3.6639, "step": 1850 }, { "epoch": 0.12, "learning_rate": 0.00076, "loss": 3.647, "step": 1900 }, { "epoch": 0.12, "learning_rate": 0.0007800000000000001, "loss": 3.6227, "step": 1950 }, { "epoch": 0.12, "learning_rate": 0.0008, "loss": 3.6005, "step": 2000 }, { "epoch": 0.13, "learning_rate": 0.00082, "loss": 3.5824, "step": 2050 }, { "epoch": 0.13, "learning_rate": 0.00084, "loss": 3.5806, "step": 2100 }, { "epoch": 0.13, "learning_rate": 0.00086, "loss": 3.5605, "step": 2150 }, { "epoch": 0.14, "learning_rate": 0.00088, "loss": 3.5325, "step": 2200 }, { "epoch": 0.14, "learning_rate": 0.0009000000000000001, "loss": 3.5417, "step": 2250 }, { "epoch": 0.14, "learning_rate": 0.00092, "loss": 3.4979, "step": 2300 }, { "epoch": 0.15, "learning_rate": 0.00094, "loss": 3.5149, "step": 2350 }, { "epoch": 0.15, "learning_rate": 0.00096, "loss": 3.4793, "step": 2400 }, { "epoch": 0.15, "learning_rate": 0.00098, "loss": 3.5049, "step": 2450 }, { "epoch": 0.15, "learning_rate": 0.001, "loss": 3.4843, "step": 2500 }, { "epoch": 0.16, "learning_rate": 0.00098, "loss": 3.464, "step": 2550 }, { "epoch": 0.16, "learning_rate": 0.00096, "loss": 3.4651, "step": 2600 }, { "epoch": 0.16, "learning_rate": 0.00094, "loss": 3.4353, "step": 2650 }, { "epoch": 0.17, "learning_rate": 0.00092, "loss": 3.4139, "step": 2700 }, { "epoch": 0.17, "learning_rate": 0.0009000000000000001, "loss": 3.4013, "step": 2750 }, { "epoch": 0.17, "learning_rate": 0.00088, "loss": 3.3647, "step": 2800 }, { "epoch": 0.18, "learning_rate": 0.00086, "loss": 3.3461, "step": 2850 }, { "epoch": 0.18, "learning_rate": 0.00084, "loss": 3.3339, "step": 2900 }, { "epoch": 0.18, "learning_rate": 0.00082, "loss": 3.3228, "step": 2950 }, { "epoch": 0.19, "learning_rate": 0.0008, "loss": 3.3239, "step": 3000 }, { "epoch": 0.19, "learning_rate": 0.0007800000000000001, "loss": 3.2763, "step": 3050 }, { "epoch": 0.19, "learning_rate": 0.00076, "loss": 3.2408, "step": 3100 }, { "epoch": 0.2, "learning_rate": 0.00074, "loss": 3.231, "step": 3150 }, { "epoch": 0.2, "learning_rate": 0.0007199999999999999, "loss": 3.2317, "step": 3200 }, { "epoch": 0.2, "learning_rate": 0.0007, "loss": 3.2208, "step": 3250 }, { "epoch": 0.2, "learning_rate": 0.00068, "loss": 3.1952, "step": 3300 }, { "epoch": 0.21, "learning_rate": 0.00066, "loss": 3.1825, "step": 3350 }, { "epoch": 0.21, "learning_rate": 0.00064, "loss": 3.184, "step": 3400 }, { "epoch": 0.21, "learning_rate": 0.00062, "loss": 3.1452, "step": 3450 }, { "epoch": 0.22, "learning_rate": 0.0006, "loss": 3.1298, "step": 3500 }, { "epoch": 0.22, "learning_rate": 0.00058, "loss": 3.1244, "step": 3550 }, { "epoch": 0.22, "learning_rate": 0.0005600000000000001, "loss": 3.114, "step": 3600 }, { "epoch": 0.23, "learning_rate": 0.00054, "loss": 3.1101, "step": 3650 }, { "epoch": 0.23, "learning_rate": 0.0005200000000000001, "loss": 3.0911, "step": 3700 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 3.0805, "step": 3750 }, { "epoch": 0.24, "learning_rate": 0.00048, "loss": 3.0669, "step": 3800 }, { "epoch": 0.24, "learning_rate": 0.00046, "loss": 3.0597, "step": 3850 }, { "epoch": 0.24, "learning_rate": 0.00044, "loss": 3.0388, "step": 3900 }, { "epoch": 0.24, "learning_rate": 0.00042, "loss": 3.0504, "step": 3950 }, { "epoch": 0.25, "learning_rate": 0.0004, "loss": 3.0222, "step": 4000 }, { "epoch": 0.25, "learning_rate": 0.00038, "loss": 3.0142, "step": 4050 }, { "epoch": 0.25, "learning_rate": 0.00035999999999999997, "loss": 3.0106, "step": 4100 }, { "epoch": 0.26, "learning_rate": 0.00034, "loss": 3.0054, "step": 4150 }, { "epoch": 0.26, "learning_rate": 0.00032, "loss": 2.9846, "step": 4200 }, { "epoch": 0.26, "learning_rate": 0.0003, "loss": 2.967, "step": 4250 }, { "epoch": 0.27, "learning_rate": 0.00028000000000000003, "loss": 2.9749, "step": 4300 }, { "epoch": 0.27, "learning_rate": 0.00026000000000000003, "loss": 2.9415, "step": 4350 }, { "epoch": 0.27, "learning_rate": 0.00024, "loss": 2.9486, "step": 4400 }, { "epoch": 0.28, "learning_rate": 0.00022, "loss": 2.9509, "step": 4450 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.9261, "step": 4500 }, { "epoch": 0.28, "learning_rate": 0.00017999999999999998, "loss": 2.9138, "step": 4550 }, { "epoch": 0.29, "learning_rate": 0.00016, "loss": 2.9356, "step": 4600 }, { "epoch": 0.29, "learning_rate": 0.00014000000000000001, "loss": 2.9036, "step": 4650 }, { "epoch": 0.29, "learning_rate": 0.00012, "loss": 2.8768, "step": 4700 }, { "epoch": 0.29, "learning_rate": 0.0001, "loss": 2.8875, "step": 4750 }, { "epoch": 0.3, "learning_rate": 8e-05, "loss": 2.8752, "step": 4800 }, { "epoch": 0.3, "learning_rate": 6e-05, "loss": 2.8816, "step": 4850 }, { "epoch": 0.3, "learning_rate": 4e-05, "loss": 2.8772, "step": 4900 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 2.8658, "step": 4950 }, { "epoch": 0.31, "learning_rate": 0.0, "loss": 2.8755, "step": 5000 }, { "epoch": 0.31, "step": 5000, "total_flos": 7.55408830464e+16, "train_loss": 3.758148254394531, "train_runtime": 13724.031, "train_samples_per_second": 93.267, "train_steps_per_second": 0.364 } ], "logging_steps": 50, "max_steps": 5000, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 7.55408830464e+16, "trial_name": null, "trial_params": null }