{ "best_global_step": null, "best_metric": 1.7856266498565674, "best_model_checkpoint": "/home/haji80as/invariant-LM2/BioMistral-7B-merged-server-10000/checkpoint-10000", "epoch": null, "eval_steps": 500, "global_step": 0, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "eval_loss": 1.8743267059326172, "eval_model_preparation_time": 0.0252, "eval_runtime": 482.614, "eval_samples_per_second": 17.045, "eval_steps_per_second": 2.132, "step": 0 }, { "eval_loss": 1.8743267059326172, "eval_model_preparation_time": 0.0252, "eval_runtime": 482.614, "eval_samples_per_second": 17.045, "eval_steps_per_second": 2.132, "step": 0 }, { "eval_loss": 1.904608130455017, "eval_model_preparation_time": 0.0252, "eval_runtime": 473.697, "eval_samples_per_second": 17.366, "eval_steps_per_second": 2.172, "step": 0 }, { "eval_loss": 1.904608130455017, "eval_model_preparation_time": 0.0252, "eval_runtime": 473.697, "eval_samples_per_second": 17.366, "eval_steps_per_second": 2.172, "step": 0 }, { "eval_loss": 1.9130403995513916, "eval_model_preparation_time": 0.0252, "eval_runtime": 470.5904, "eval_samples_per_second": 17.48, "eval_steps_per_second": 2.187, "step": 0 }, { "eval_loss": 1.9130403995513916, "eval_model_preparation_time": 0.0252, "eval_runtime": 470.5904, "eval_samples_per_second": 17.48, "eval_steps_per_second": 2.187, "step": 0 }, { "eval_loss": 1.9170527458190918, "eval_model_preparation_time": 0.0252, "eval_runtime": 475.5854, "eval_samples_per_second": 17.297, "eval_steps_per_second": 2.164, "step": 0 }, { "eval_loss": 1.9170527458190918, "eval_model_preparation_time": 0.0252, "eval_runtime": 475.5854, "eval_samples_per_second": 17.297, "eval_steps_per_second": 2.164, "step": 0 }, { "eval_loss": 1.9460963010787964, "eval_model_preparation_time": 0.0252, "eval_runtime": 479.3938, "eval_samples_per_second": 17.159, "eval_steps_per_second": 2.146, "step": 0 }, { "eval_loss": 1.9460963010787964, "eval_model_preparation_time": 0.0252, "eval_runtime": 479.3938, "eval_samples_per_second": 17.159, "eval_steps_per_second": 2.146, "step": 0 }, { "eval_loss": 1.928282618522644, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2283, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.928282618522644, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2283, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.9074980020523071, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.1381, "eval_samples_per_second": 17.838, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.9074980020523071, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.1381, "eval_samples_per_second": 17.838, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.9039336442947388, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.138, "eval_samples_per_second": 17.838, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.9039336442947388, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.138, "eval_samples_per_second": 17.838, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.8650752305984497, "eval_model_preparation_time": 0.0252, "eval_runtime": 465.4106, "eval_samples_per_second": 17.675, "eval_steps_per_second": 2.211, "step": 0 }, { "eval_loss": 1.8650752305984497, "eval_model_preparation_time": 0.0252, "eval_runtime": 465.4106, "eval_samples_per_second": 17.675, "eval_steps_per_second": 2.211, "step": 0 }, { "eval_loss": 1.841698408126831, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.0916, "eval_samples_per_second": 17.84, "eval_steps_per_second": 2.232, "step": 0 }, { "eval_loss": 1.841698408126831, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.0916, "eval_samples_per_second": 17.84, "eval_steps_per_second": 2.232, "step": 0 }, { "eval_loss": 1.8204604387283325, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2365, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.8204604387283325, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2365, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.8339213132858276, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.1973, "eval_samples_per_second": 17.836, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.8339213132858276, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.1973, "eval_samples_per_second": 17.836, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.8116841316223145, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.3888, "eval_samples_per_second": 17.829, "eval_steps_per_second": 2.23, "step": 0 }, { "eval_loss": 1.8116841316223145, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.3888, "eval_samples_per_second": 17.829, "eval_steps_per_second": 2.23, "step": 0 }, { "eval_loss": 1.8126027584075928, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2044, "eval_samples_per_second": 17.836, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.8126027584075928, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2044, "eval_samples_per_second": 17.836, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7994815111160278, "eval_model_preparation_time": 0.0252, "eval_runtime": 464.7739, "eval_samples_per_second": 17.699, "eval_steps_per_second": 2.214, "step": 0 }, { "eval_loss": 1.7994815111160278, "eval_model_preparation_time": 0.0252, "eval_runtime": 464.7739, "eval_samples_per_second": 17.699, "eval_steps_per_second": 2.214, "step": 0 }, { "eval_loss": 1.7974251508712769, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2176, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7974251508712769, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2176, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7886863946914673, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2775, "eval_samples_per_second": 17.833, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7886863946914673, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2775, "eval_samples_per_second": 17.833, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7869404554367065, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2402, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7869404554367065, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.2402, "eval_samples_per_second": 17.835, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7859430313110352, "eval_model_preparation_time": 0.0252, "eval_runtime": 483.5151, "eval_samples_per_second": 17.013, "eval_steps_per_second": 2.128, "step": 0 }, { "eval_loss": 1.7859430313110352, "eval_model_preparation_time": 0.0252, "eval_runtime": 483.5151, "eval_samples_per_second": 17.013, "eval_steps_per_second": 2.128, "step": 0 }, { "eval_loss": 1.7856266498565674, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.1594, "eval_samples_per_second": 17.838, "eval_steps_per_second": 2.231, "step": 0 }, { "eval_loss": 1.7856266498565674, "eval_model_preparation_time": 0.0252, "eval_runtime": 461.1594, "eval_samples_per_second": 17.838, "eval_steps_per_second": 2.231, "step": 0 } ], "logging_steps": 500, "max_steps": 0, "num_input_tokens_seen": 0, "num_train_epochs": 0, "save_steps": 500, "stateful_callbacks": {}, "total_flos": 0, "train_batch_size": null, "trial_name": null, "trial_params": null }