{ "best_global_step": 50, "best_metric": 0.06661392003297806, "best_model_checkpoint": "./lifestyle-advisor-qwen-adapters/checkpoint-50", "epoch": 0.37037037037037035, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007407407407407408, "grad_norm": 0.8554655909538269, "learning_rate": 0.0, "loss": 2.2753, "step": 1 }, { "epoch": 0.014814814814814815, "grad_norm": 0.7770459651947021, "learning_rate": 2e-05, "loss": 2.2117, "step": 2 }, { "epoch": 0.022222222222222223, "grad_norm": 0.8054313063621521, "learning_rate": 4e-05, "loss": 2.2692, "step": 3 }, { "epoch": 0.02962962962962963, "grad_norm": 0.8971659541130066, "learning_rate": 6e-05, "loss": 2.2754, "step": 4 }, { "epoch": 0.037037037037037035, "grad_norm": 0.7792930006980896, "learning_rate": 8e-05, "loss": 2.1906, "step": 5 }, { "epoch": 0.044444444444444446, "grad_norm": 1.0174905061721802, "learning_rate": 0.0001, "loss": 2.0765, "step": 6 }, { "epoch": 0.05185185185185185, "grad_norm": 0.7820740938186646, "learning_rate": 0.00012, "loss": 1.9413, "step": 7 }, { "epoch": 0.05925925925925926, "grad_norm": 0.7353349328041077, "learning_rate": 0.00014, "loss": 1.8044, "step": 8 }, { "epoch": 0.06666666666666667, "grad_norm": 0.702397346496582, "learning_rate": 0.00016, "loss": 1.6369, "step": 9 }, { "epoch": 0.07407407407407407, "grad_norm": 0.6150615215301514, "learning_rate": 0.00018, "loss": 1.4686, "step": 10 }, { "epoch": 0.08148148148148149, "grad_norm": 0.5668545961380005, "learning_rate": 0.0002, "loss": 1.2712, "step": 11 }, { "epoch": 0.08888888888888889, "grad_norm": 0.6054787635803223, "learning_rate": 0.00019777777777777778, "loss": 1.1116, "step": 12 }, { "epoch": 0.0962962962962963, "grad_norm": 0.6496606469154358, "learning_rate": 0.00019555555555555556, "loss": 0.941, "step": 13 }, { "epoch": 0.1037037037037037, "grad_norm": 0.5952312350273132, "learning_rate": 0.00019333333333333333, "loss": 0.8313, "step": 14 }, { "epoch": 0.1111111111111111, "grad_norm": 0.539272665977478, "learning_rate": 0.00019111111111111114, "loss": 0.6799, "step": 15 }, { "epoch": 0.11851851851851852, "grad_norm": 0.6647361516952515, "learning_rate": 0.00018888888888888888, "loss": 0.5579, "step": 16 }, { "epoch": 0.1259259259259259, "grad_norm": 0.4626752734184265, "learning_rate": 0.0001866666666666667, "loss": 0.4265, "step": 17 }, { "epoch": 0.13333333333333333, "grad_norm": 0.4744594395160675, "learning_rate": 0.00018444444444444446, "loss": 0.3297, "step": 18 }, { "epoch": 0.14074074074074075, "grad_norm": 0.5266918540000916, "learning_rate": 0.00018222222222222224, "loss": 0.2863, "step": 19 }, { "epoch": 0.14814814814814814, "grad_norm": 0.5599148273468018, "learning_rate": 0.00018, "loss": 0.227, "step": 20 }, { "epoch": 0.15555555555555556, "grad_norm": 0.332426518201828, "learning_rate": 0.00017777777777777779, "loss": 0.167, "step": 21 }, { "epoch": 0.16296296296296298, "grad_norm": 0.3609375059604645, "learning_rate": 0.00017555555555555556, "loss": 0.1485, "step": 22 }, { "epoch": 0.17037037037037037, "grad_norm": 0.29623323678970337, "learning_rate": 0.00017333333333333334, "loss": 0.1369, "step": 23 }, { "epoch": 0.17777777777777778, "grad_norm": 0.2666948139667511, "learning_rate": 0.0001711111111111111, "loss": 0.1193, "step": 24 }, { "epoch": 0.18518518518518517, "grad_norm": 0.32019367814064026, "learning_rate": 0.00016888888888888889, "loss": 0.1249, "step": 25 }, { "epoch": 0.1925925925925926, "grad_norm": 0.28152069449424744, "learning_rate": 0.0001666666666666667, "loss": 0.1102, "step": 26 }, { "epoch": 0.2, "grad_norm": 0.28925931453704834, "learning_rate": 0.00016444444444444444, "loss": 0.0936, "step": 27 }, { "epoch": 0.2074074074074074, "grad_norm": 0.21765638887882233, "learning_rate": 0.00016222222222222224, "loss": 0.1021, "step": 28 }, { "epoch": 0.21481481481481482, "grad_norm": 0.22364364564418793, "learning_rate": 0.00016, "loss": 0.0953, "step": 29 }, { "epoch": 0.2222222222222222, "grad_norm": 0.2058999389410019, "learning_rate": 0.0001577777777777778, "loss": 0.0952, "step": 30 }, { "epoch": 0.22962962962962963, "grad_norm": 0.17060202360153198, "learning_rate": 0.00015555555555555556, "loss": 0.0866, "step": 31 }, { "epoch": 0.23703703703703705, "grad_norm": 0.17542508244514465, "learning_rate": 0.00015333333333333334, "loss": 0.0783, "step": 32 }, { "epoch": 0.24444444444444444, "grad_norm": 0.23341824114322662, "learning_rate": 0.0001511111111111111, "loss": 0.1003, "step": 33 }, { "epoch": 0.2518518518518518, "grad_norm": 0.17184007167816162, "learning_rate": 0.0001488888888888889, "loss": 0.0774, "step": 34 }, { "epoch": 0.25925925925925924, "grad_norm": 0.18990236520767212, "learning_rate": 0.00014666666666666666, "loss": 0.0852, "step": 35 }, { "epoch": 0.26666666666666666, "grad_norm": 0.17710138857364655, "learning_rate": 0.00014444444444444444, "loss": 0.0798, "step": 36 }, { "epoch": 0.2740740740740741, "grad_norm": 0.1709614247083664, "learning_rate": 0.00014222222222222224, "loss": 0.0787, "step": 37 }, { "epoch": 0.2814814814814815, "grad_norm": 0.2045363336801529, "learning_rate": 0.00014, "loss": 0.0647, "step": 38 }, { "epoch": 0.28888888888888886, "grad_norm": 0.17839980125427246, "learning_rate": 0.0001377777777777778, "loss": 0.0647, "step": 39 }, { "epoch": 0.2962962962962963, "grad_norm": 0.1639847755432129, "learning_rate": 0.00013555555555555556, "loss": 0.0633, "step": 40 }, { "epoch": 0.3037037037037037, "grad_norm": 0.142787903547287, "learning_rate": 0.00013333333333333334, "loss": 0.0669, "step": 41 }, { "epoch": 0.3111111111111111, "grad_norm": 0.1273476481437683, "learning_rate": 0.00013111111111111111, "loss": 0.0669, "step": 42 }, { "epoch": 0.31851851851851853, "grad_norm": 0.12263656407594681, "learning_rate": 0.00012888888888888892, "loss": 0.0555, "step": 43 }, { "epoch": 0.32592592592592595, "grad_norm": 0.1499451845884323, "learning_rate": 0.00012666666666666666, "loss": 0.0664, "step": 44 }, { "epoch": 0.3333333333333333, "grad_norm": 0.11736824363470078, "learning_rate": 0.00012444444444444444, "loss": 0.0646, "step": 45 }, { "epoch": 0.34074074074074073, "grad_norm": 0.12536290287971497, "learning_rate": 0.00012222222222222224, "loss": 0.0681, "step": 46 }, { "epoch": 0.34814814814814815, "grad_norm": 0.12962676584720612, "learning_rate": 0.00012, "loss": 0.0746, "step": 47 }, { "epoch": 0.35555555555555557, "grad_norm": 0.07444795221090317, "learning_rate": 0.00011777777777777779, "loss": 0.0598, "step": 48 }, { "epoch": 0.362962962962963, "grad_norm": 0.3645591139793396, "learning_rate": 0.00011555555555555555, "loss": 0.0771, "step": 49 }, { "epoch": 0.37037037037037035, "grad_norm": 0.08011030405759811, "learning_rate": 0.00011333333333333334, "loss": 0.062, "step": 50 }, { "epoch": 0.37037037037037035, "eval_loss": 0.06661392003297806, "eval_runtime": 12.7288, "eval_samples_per_second": 9.427, "eval_steps_per_second": 4.714, "step": 50 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9609617693958144.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }