{ "best_global_step": 1628, "best_metric": 0.964, "best_model_checkpoint": "model_tasks/checkpoint-1628", "epoch": 4.0, "eval_steps": 500, "global_step": 1628, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12285012285012285, "grad_norm": 58.800106048583984, "learning_rate": 4.803921568627452e-06, "loss": 13.935902099609375, "step": 50 }, { "epoch": 0.2457002457002457, "grad_norm": 20.866506576538086, "learning_rate": 9.705882352941177e-06, "loss": 2.0049127197265624, "step": 100 }, { "epoch": 0.36855036855036855, "grad_norm": 47.32191467285156, "learning_rate": 1.4607843137254903e-05, "loss": 1.2294308471679687, "step": 150 }, { "epoch": 0.4914004914004914, "grad_norm": 22.775053024291992, "learning_rate": 1.950980392156863e-05, "loss": 0.9614788818359375, "step": 200 }, { "epoch": 0.6142506142506142, "grad_norm": 20.26030731201172, "learning_rate": 1.9970207816945425e-05, "loss": 0.7626658630371094, "step": 250 }, { "epoch": 0.7371007371007371, "grad_norm": 15.181203842163086, "learning_rate": 1.9867450374363293e-05, "loss": 0.7171074676513672, "step": 300 }, { "epoch": 0.85995085995086, "grad_norm": 30.844865798950195, "learning_rate": 1.9692115492385916e-05, "loss": 0.6501062774658203, "step": 350 }, { "epoch": 0.9828009828009828, "grad_norm": 9.524636268615723, "learning_rate": 1.944549280068335e-05, "loss": 0.5243024063110352, "step": 400 }, { "epoch": 1.0, "eval_cls_accuracy": 0.902, "eval_cls_f1": 0.9019525450317953, "eval_loss": 0.48792028427124023, "eval_runtime": 2.3223, "eval_samples_per_second": 215.305, "eval_sim_mae": 0.366321861743927, "eval_steps_per_second": 6.89, "step": 407 }, { "epoch": 1.1056511056511056, "grad_norm": 12.419134140014648, "learning_rate": 1.9129396267685277e-05, "loss": 0.41418304443359377, "step": 450 }, { "epoch": 1.2285012285012284, "grad_norm": 18.077287673950195, "learning_rate": 1.874615085841268e-05, "loss": 0.3944705581665039, "step": 500 }, { "epoch": 1.3513513513513513, "grad_norm": 8.001004219055176, "learning_rate": 1.829857543380833e-05, "loss": 0.42740669250488283, "step": 550 }, { "epoch": 1.4742014742014742, "grad_norm": 20.04601287841797, "learning_rate": 1.7789962017345527e-05, "loss": 0.389661865234375, "step": 600 }, { "epoch": 1.597051597051597, "grad_norm": 6.977843284606934, "learning_rate": 1.7224051581414144e-05, "loss": 0.38964271545410156, "step": 650 }, { "epoch": 1.71990171990172, "grad_norm": 7.663310527801514, "learning_rate": 1.6605006531580833e-05, "loss": 0.35614356994628904, "step": 700 }, { "epoch": 1.8427518427518428, "grad_norm": 12.420454978942871, "learning_rate": 1.593738009110821e-05, "loss": 0.320499153137207, "step": 750 }, { "epoch": 1.9656019656019657, "grad_norm": 13.657429695129395, "learning_rate": 1.5226082810917096e-05, "loss": 0.3286134719848633, "step": 800 }, { "epoch": 2.0, "eval_cls_accuracy": 0.946, "eval_cls_f1": 0.9459513562205986, "eval_loss": 0.31798821687698364, "eval_runtime": 2.3034, "eval_samples_per_second": 217.067, "eval_sim_mae": 0.2837124764919281, "eval_steps_per_second": 6.946, "step": 814 }, { "epoch": 2.0884520884520885, "grad_norm": 3.6505086421966553, "learning_rate": 1.4476346451319078e-05, "loss": 0.2602015495300293, "step": 850 }, { "epoch": 2.211302211302211, "grad_norm": 7.000202655792236, "learning_rate": 1.3693685501177709e-05, "loss": 0.20756610870361328, "step": 900 }, { "epoch": 2.3341523341523343, "grad_norm": 8.487826347351074, "learning_rate": 1.2883856617534158e-05, "loss": 0.2469894790649414, "step": 950 }, { "epoch": 2.457002457002457, "grad_norm": 13.440855979919434, "learning_rate": 1.2052816284028395e-05, "loss": 0.20867227554321288, "step": 1000 }, { "epoch": 2.57985257985258, "grad_norm": 7.461733341217041, "learning_rate": 1.1206676999548351e-05, "loss": 0.21205909729003905, "step": 1050 }, { "epoch": 2.7027027027027026, "grad_norm": 15.598981857299805, "learning_rate": 1.0351662319349887e-05, "loss": 0.21189189910888673, "step": 1100 }, { "epoch": 2.8255528255528253, "grad_norm": 5.610878944396973, "learning_rate": 9.494061079330812e-06, "loss": 0.207319278717041, "step": 1150 }, { "epoch": 2.9484029484029484, "grad_norm": 11.040838241577148, "learning_rate": 8.640181140150297e-06, "loss": 0.17402687072753906, "step": 1200 }, { "epoch": 3.0, "eval_cls_accuracy": 0.958, "eval_cls_f1": 0.9579917663862118, "eval_loss": 0.28256744146347046, "eval_runtime": 2.3027, "eval_samples_per_second": 217.134, "eval_sim_mae": 0.25911375880241394, "eval_steps_per_second": 6.948, "step": 1221 }, { "epoch": 3.0712530712530715, "grad_norm": 6.649824619293213, "learning_rate": 7.79630299141665e-06, "loss": 0.1433491039276123, "step": 1250 }, { "epoch": 3.194103194103194, "grad_norm": 4.330808639526367, "learning_rate": 6.968633557195666e-06, "loss": 0.11451138496398926, "step": 1300 }, { "epoch": 3.3169533169533167, "grad_norm": 25.754131317138672, "learning_rate": 6.163260542611005e-06, "loss": 0.12067122459411621, "step": 1350 }, { "epoch": 3.43980343980344, "grad_norm": 3.0961501598358154, "learning_rate": 5.386107657328183e-06, "loss": 0.09894429206848145, "step": 1400 }, { "epoch": 3.562653562653563, "grad_norm": 2.771333932876587, "learning_rate": 4.642891045264073e-06, "loss": 0.08938132286071777, "step": 1450 }, { "epoch": 3.6855036855036856, "grad_norm": 3.6692490577697754, "learning_rate": 3.939077240991749e-06, "loss": 0.10362943649291992, "step": 1500 }, { "epoch": 3.808353808353808, "grad_norm": 2.1008787155151367, "learning_rate": 3.279842962081324e-06, "loss": 0.09946866989135743, "step": 1550 }, { "epoch": 3.9312039312039313, "grad_norm": 1.3774856328964233, "learning_rate": 2.670037033113577e-06, "loss": 0.13387378692626953, "step": 1600 }, { "epoch": 4.0, "eval_cls_accuracy": 0.964, "eval_cls_f1": 0.964, "eval_loss": 0.25317928194999695, "eval_runtime": 2.305, "eval_samples_per_second": 216.923, "eval_sim_mae": 0.2385944277048111, "eval_steps_per_second": 6.942, "step": 1628 } ], "logging_steps": 50, "max_steps": 2035, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }