{ "best_global_step": null, "best_metric": 7.719013214111328, "best_model_checkpoint": "/home/haji80as/invariant-LM/Qwen2-1.5B-merged-server-20000-rank-128/checkpoint-4500", "epoch": null, "eval_steps": 500, "global_step": 0, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "eval_loss": 10.818012237548828, "eval_model_preparation_time": 0.0227, "eval_runtime": 251.0603, "eval_samples_per_second": 24.512, "eval_steps_per_second": 3.067, "step": 0 }, { "eval_loss": 10.818012237548828, "eval_model_preparation_time": 0.0227, "eval_runtime": 251.0603, "eval_samples_per_second": 24.512, "eval_steps_per_second": 3.067, "step": 0 }, { "eval_loss": 10.100893020629883, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.5086, "eval_samples_per_second": 24.566, "eval_steps_per_second": 3.074, "step": 0 }, { "eval_loss": 10.100893020629883, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.5086, "eval_samples_per_second": 24.566, "eval_steps_per_second": 3.074, "step": 0 }, { "eval_loss": 9.53693675994873, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.8052, "eval_samples_per_second": 24.537, "eval_steps_per_second": 3.07, "step": 0 }, { "eval_loss": 9.53693675994873, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.8052, "eval_samples_per_second": 24.537, "eval_steps_per_second": 3.07, "step": 0 }, { "eval_loss": 9.094352722167969, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.92, "eval_samples_per_second": 24.526, "eval_steps_per_second": 3.069, "step": 0 }, { "eval_loss": 9.094352722167969, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.92, "eval_samples_per_second": 24.526, "eval_steps_per_second": 3.069, "step": 0 }, { "eval_loss": 8.673422813415527, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.8965, "eval_samples_per_second": 24.528, "eval_steps_per_second": 3.069, "step": 0 }, { "eval_loss": 8.673422813415527, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.8965, "eval_samples_per_second": 24.528, "eval_steps_per_second": 3.069, "step": 0 }, { "eval_loss": 8.244048118591309, "eval_model_preparation_time": 0.0227, "eval_runtime": 262.4664, "eval_samples_per_second": 23.447, "eval_steps_per_second": 2.934, "step": 0 }, { "eval_loss": 8.244048118591309, "eval_model_preparation_time": 0.0227, "eval_runtime": 262.4664, "eval_samples_per_second": 23.447, "eval_steps_per_second": 2.934, "step": 0 }, { "eval_loss": 7.927587985992432, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.5702, "eval_samples_per_second": 24.56, "eval_steps_per_second": 3.073, "step": 0 }, { "eval_loss": 7.927587985992432, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.5702, "eval_samples_per_second": 24.56, "eval_steps_per_second": 3.073, "step": 0 }, { "eval_loss": 7.749431133270264, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.1536, "eval_samples_per_second": 24.601, "eval_steps_per_second": 3.078, "step": 0 }, { "eval_loss": 7.749431133270264, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.1536, "eval_samples_per_second": 24.601, "eval_steps_per_second": 3.078, "step": 0 }, { "eval_loss": 7.719013214111328, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.8627, "eval_samples_per_second": 24.531, "eval_steps_per_second": 3.069, "step": 0 }, { "eval_loss": 7.719013214111328, "eval_model_preparation_time": 0.0227, "eval_runtime": 250.8627, "eval_samples_per_second": 24.531, "eval_steps_per_second": 3.069, "step": 0 } ], "logging_steps": 500, "max_steps": 0, "num_input_tokens_seen": 0, "num_train_epochs": 0, "save_steps": 500, "stateful_callbacks": {}, "total_flos": 0, "train_batch_size": null, "trial_name": null, "trial_params": null }