Instructions to use Arthur-Tsai/histv4_ftis_pretrain_tssp-smlm_0329 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Arthur-Tsai/histv4_ftis_pretrain_tssp-smlm_0329 with Transformers:
# Load model directly from transformers import HiSenTrans model = HiSenTrans.from_pretrained("Arthur-Tsai/histv4_ftis_pretrain_tssp-smlm_0329", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.7087897186605305, | |
| "best_model_checkpoint": "histv4_ftis_pretrain_tssp-smlm_0329/checkpoint-20800", | |
| "epoch": 305.0004460966543, | |
| "eval_steps": 100, | |
| "global_step": 20800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003717472118959108, | |
| "grad_norm": 266.68756103515625, | |
| "learning_rate": 7.434944237918216e-07, | |
| "loss": 46.6745, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0002379182156134, | |
| "grad_norm": 117.62641906738281, | |
| "learning_rate": 1.4869888475836432e-06, | |
| "loss": 38.9524, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0002379182156134, | |
| "eval_accuracy": 0.050442267150417074, | |
| "eval_loss": 25.470186233520508, | |
| "eval_macro_f1": 0.025968145861855703, | |
| "eval_runtime": 63.0308, | |
| "eval_samples_per_second": 10.661, | |
| "eval_steps_per_second": 2.665, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.000104089219331, | |
| "grad_norm": 205.3363494873047, | |
| "learning_rate": 2.2304832713754648e-06, | |
| "loss": 32.2356, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0004758364312267, | |
| "grad_norm": 93.07455444335938, | |
| "learning_rate": 2.9739776951672864e-06, | |
| "loss": 23.6969, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0004758364312267, | |
| "eval_accuracy": 0.06100102460059542, | |
| "eval_loss": 14.61188793182373, | |
| "eval_macro_f1": 0.02836623982308631, | |
| "eval_runtime": 58.7907, | |
| "eval_samples_per_second": 11.43, | |
| "eval_steps_per_second": 2.858, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.000342007434944, | |
| "grad_norm": 26.653369903564453, | |
| "learning_rate": 3.717472118959108e-06, | |
| "loss": 15.7121, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.000208178438662, | |
| "grad_norm": 12.779120445251465, | |
| "learning_rate": 4.4609665427509296e-06, | |
| "loss": 11.4531, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.000208178438662, | |
| "eval_accuracy": 0.2494337794862716, | |
| "eval_loss": 8.398870468139648, | |
| "eval_macro_f1": 0.07975966315085589, | |
| "eval_runtime": 57.7876, | |
| "eval_samples_per_second": 11.629, | |
| "eval_steps_per_second": 2.907, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.00007434944238, | |
| "grad_norm": 19.828210830688477, | |
| "learning_rate": 5.2044609665427516e-06, | |
| "loss": 9.2358, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.000446096654275, | |
| "grad_norm": 35.386112213134766, | |
| "learning_rate": 5.947955390334573e-06, | |
| "loss": 7.7719, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.000446096654275, | |
| "eval_accuracy": 0.5031238102652373, | |
| "eval_loss": 6.939528465270996, | |
| "eval_macro_f1": 0.13061780478609422, | |
| "eval_runtime": 55.3045, | |
| "eval_samples_per_second": 12.151, | |
| "eval_steps_per_second": 3.038, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.000312267657993, | |
| "grad_norm": 8.993448257446289, | |
| "learning_rate": 6.691449814126394e-06, | |
| "loss": 7.3923, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.00017843866171, | |
| "grad_norm": 41.571659088134766, | |
| "learning_rate": 7.434944237918216e-06, | |
| "loss": 6.7875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.00017843866171, | |
| "eval_accuracy": 0.552664406739702, | |
| "eval_loss": 6.486644744873047, | |
| "eval_macro_f1": 0.1470886256918198, | |
| "eval_runtime": 54.4232, | |
| "eval_samples_per_second": 12.348, | |
| "eval_steps_per_second": 3.087, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.000044609665428, | |
| "grad_norm": 10.922592163085938, | |
| "learning_rate": 8.178438661710038e-06, | |
| "loss": 6.2835, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 8.000416356877324, | |
| "grad_norm": 8.742582321166992, | |
| "learning_rate": 8.921933085501859e-06, | |
| "loss": 6.2386, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.000416356877324, | |
| "eval_accuracy": 0.5771052858930745, | |
| "eval_loss": 5.778635025024414, | |
| "eval_macro_f1": 0.15409460900219704, | |
| "eval_runtime": 51.6608, | |
| "eval_samples_per_second": 13.008, | |
| "eval_steps_per_second": 3.252, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.00028252788104, | |
| "grad_norm": 7.5611162185668945, | |
| "learning_rate": 9.665427509293682e-06, | |
| "loss": 5.7251, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 10.00014869888476, | |
| "grad_norm": 19.309194564819336, | |
| "learning_rate": 1.0408921933085503e-05, | |
| "loss": 5.8795, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 10.00014869888476, | |
| "eval_accuracy": 0.5914996103544202, | |
| "eval_loss": 5.594468593597412, | |
| "eval_macro_f1": 0.15595900294660686, | |
| "eval_runtime": 50.9102, | |
| "eval_samples_per_second": 13.2, | |
| "eval_steps_per_second": 3.3, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 11.000014869888476, | |
| "grad_norm": 31.623323440551758, | |
| "learning_rate": 1.1152416356877324e-05, | |
| "loss": 5.5781, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 11.000386617100371, | |
| "grad_norm": 9.973241806030273, | |
| "learning_rate": 1.1895910780669145e-05, | |
| "loss": 5.4314, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 11.000386617100371, | |
| "eval_accuracy": 0.6039963812217861, | |
| "eval_loss": 4.915599822998047, | |
| "eval_macro_f1": 0.16604226857183932, | |
| "eval_runtime": 49.9902, | |
| "eval_samples_per_second": 13.443, | |
| "eval_steps_per_second": 3.361, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 12.000252788104088, | |
| "grad_norm": 7.87252950668335, | |
| "learning_rate": 1.2639405204460967e-05, | |
| "loss": 5.0735, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 13.000118959107807, | |
| "grad_norm": 6.907707691192627, | |
| "learning_rate": 1.3382899628252788e-05, | |
| "loss": 5.0565, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 13.000118959107807, | |
| "eval_accuracy": 0.6114972362936331, | |
| "eval_loss": 4.2589311599731445, | |
| "eval_macro_f1": 0.1720376175468991, | |
| "eval_runtime": 49.3118, | |
| "eval_samples_per_second": 13.628, | |
| "eval_steps_per_second": 3.407, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 13.000490706319702, | |
| "grad_norm": 6.981306076049805, | |
| "learning_rate": 1.412639405204461e-05, | |
| "loss": 4.8411, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 14.00035687732342, | |
| "grad_norm": 7.001990795135498, | |
| "learning_rate": 1.4869888475836432e-05, | |
| "loss": 4.6784, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 14.00035687732342, | |
| "eval_accuracy": 0.6227607074266499, | |
| "eval_loss": 4.0632429122924805, | |
| "eval_macro_f1": 0.17299715055148895, | |
| "eval_runtime": 48.9379, | |
| "eval_samples_per_second": 13.732, | |
| "eval_steps_per_second": 3.433, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 15.000223048327138, | |
| "grad_norm": 8.357354164123535, | |
| "learning_rate": 1.5613382899628255e-05, | |
| "loss": 4.4646, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 16.000089219330857, | |
| "grad_norm": 6.316117763519287, | |
| "learning_rate": 1.6356877323420076e-05, | |
| "loss": 4.3903, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 16.000089219330857, | |
| "eval_accuracy": 0.6337044380919039, | |
| "eval_loss": 3.4835870265960693, | |
| "eval_macro_f1": 0.18789200970805336, | |
| "eval_runtime": 50.0522, | |
| "eval_samples_per_second": 13.426, | |
| "eval_steps_per_second": 3.356, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 16.00046096654275, | |
| "grad_norm": 14.071303367614746, | |
| "learning_rate": 1.7100371747211897e-05, | |
| "loss": 4.2677, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 17.000327137546467, | |
| "grad_norm": 7.639101982116699, | |
| "learning_rate": 1.7843866171003718e-05, | |
| "loss": 3.7652, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 17.000327137546467, | |
| "eval_accuracy": 0.6313526240053495, | |
| "eval_loss": 3.313999652862549, | |
| "eval_macro_f1": 0.19168345755706534, | |
| "eval_runtime": 48.171, | |
| "eval_samples_per_second": 13.95, | |
| "eval_steps_per_second": 3.488, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 18.000193308550188, | |
| "grad_norm": 6.102409839630127, | |
| "learning_rate": 1.858736059479554e-05, | |
| "loss": 3.7993, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 19.000059479553904, | |
| "grad_norm": 5.737588882446289, | |
| "learning_rate": 1.9330855018587364e-05, | |
| "loss": 3.5554, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 19.000059479553904, | |
| "eval_accuracy": 0.6518905281527871, | |
| "eval_loss": 2.997008800506592, | |
| "eval_macro_f1": 0.19591256463059686, | |
| "eval_runtime": 48.2904, | |
| "eval_samples_per_second": 13.916, | |
| "eval_steps_per_second": 3.479, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 19.000431226765798, | |
| "grad_norm": 8.27597713470459, | |
| "learning_rate": 2.0074349442379185e-05, | |
| "loss": 3.5706, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 20.00029739776952, | |
| "grad_norm": 7.908275127410889, | |
| "learning_rate": 2.0817843866171006e-05, | |
| "loss": 3.2164, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 20.00029739776952, | |
| "eval_accuracy": 0.6618917672038582, | |
| "eval_loss": 2.7557449340820312, | |
| "eval_macro_f1": 0.20366848025691103, | |
| "eval_runtime": 48.1076, | |
| "eval_samples_per_second": 13.969, | |
| "eval_steps_per_second": 3.492, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 21.000163568773235, | |
| "grad_norm": 10.48082447052002, | |
| "learning_rate": 2.1561338289962827e-05, | |
| "loss": 2.9457, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 22.000029739776952, | |
| "grad_norm": 3.876420497894287, | |
| "learning_rate": 2.230483271375465e-05, | |
| "loss": 2.9022, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 22.000029739776952, | |
| "eval_accuracy": 0.6745563484687298, | |
| "eval_loss": 2.527372360229492, | |
| "eval_macro_f1": 0.2157652452048655, | |
| "eval_runtime": 48.1074, | |
| "eval_samples_per_second": 13.969, | |
| "eval_steps_per_second": 3.492, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 22.00040148698885, | |
| "grad_norm": 7.9988932609558105, | |
| "learning_rate": 2.304832713754647e-05, | |
| "loss": 2.8802, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 23.000267657992566, | |
| "grad_norm": 12.270583152770996, | |
| "learning_rate": 2.379182156133829e-05, | |
| "loss": 2.745, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 23.000267657992566, | |
| "eval_accuracy": 0.6913894280781943, | |
| "eval_loss": 2.355086088180542, | |
| "eval_macro_f1": 0.23149517864191574, | |
| "eval_runtime": 48.147, | |
| "eval_samples_per_second": 13.957, | |
| "eval_steps_per_second": 3.489, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 24.000133828996283, | |
| "grad_norm": 5.4861040115356445, | |
| "learning_rate": 2.4535315985130116e-05, | |
| "loss": 2.6132, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 24.000505576208177, | |
| "grad_norm": 6.245904445648193, | |
| "learning_rate": 2.5278810408921933e-05, | |
| "loss": 2.4721, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 24.000505576208177, | |
| "eval_accuracy": 0.6931836196159022, | |
| "eval_loss": 2.360691547393799, | |
| "eval_macro_f1": 0.24161708403963666, | |
| "eval_runtime": 46.7909, | |
| "eval_samples_per_second": 14.362, | |
| "eval_steps_per_second": 3.59, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 25.000371747211897, | |
| "grad_norm": 3.8083865642547607, | |
| "learning_rate": 2.6022304832713758e-05, | |
| "loss": 2.4046, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 26.000237918215614, | |
| "grad_norm": 10.2521333694458, | |
| "learning_rate": 2.6765799256505576e-05, | |
| "loss": 2.3065, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 26.000237918215614, | |
| "eval_accuracy": 0.7109710905521758, | |
| "eval_loss": 2.1845011711120605, | |
| "eval_macro_f1": 0.25447139713367894, | |
| "eval_runtime": 48.4023, | |
| "eval_samples_per_second": 13.884, | |
| "eval_steps_per_second": 3.471, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 27.00010408921933, | |
| "grad_norm": 4.3791022300720215, | |
| "learning_rate": 2.75092936802974e-05, | |
| "loss": 2.328, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 27.000475836431228, | |
| "grad_norm": 5.141542434692383, | |
| "learning_rate": 2.825278810408922e-05, | |
| "loss": 2.3132, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 27.000475836431228, | |
| "eval_accuracy": 0.7144459720258327, | |
| "eval_loss": 2.155479907989502, | |
| "eval_macro_f1": 0.25474356578920043, | |
| "eval_runtime": 48.1297, | |
| "eval_samples_per_second": 13.962, | |
| "eval_steps_per_second": 3.491, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 28.000342007434945, | |
| "grad_norm": 7.797635078430176, | |
| "learning_rate": 2.8996282527881043e-05, | |
| "loss": 2.1353, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 29.000208178438662, | |
| "grad_norm": 6.269540309906006, | |
| "learning_rate": 2.9739776951672864e-05, | |
| "loss": 2.069, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 29.000208178438662, | |
| "eval_accuracy": 0.7228523113819296, | |
| "eval_loss": 2.0353355407714844, | |
| "eval_macro_f1": 0.2780559155369861, | |
| "eval_runtime": 48.912, | |
| "eval_samples_per_second": 13.739, | |
| "eval_steps_per_second": 3.435, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 30.00007434944238, | |
| "grad_norm": 7.6095099449157715, | |
| "learning_rate": 3.0483271375464685e-05, | |
| "loss": 2.0509, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 30.000446096654276, | |
| "grad_norm": 7.309573650360107, | |
| "learning_rate": 3.122676579925651e-05, | |
| "loss": 2.065, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 30.000446096654276, | |
| "eval_accuracy": 0.7238860947101218, | |
| "eval_loss": 2.0333032608032227, | |
| "eval_macro_f1": 0.2880652536535786, | |
| "eval_runtime": 48.1719, | |
| "eval_samples_per_second": 13.95, | |
| "eval_steps_per_second": 3.488, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 31.000312267657993, | |
| "grad_norm": 6.504179000854492, | |
| "learning_rate": 3.1970260223048324e-05, | |
| "loss": 1.9473, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 32.00017843866171, | |
| "grad_norm": 7.720032215118408, | |
| "learning_rate": 3.271375464684015e-05, | |
| "loss": 1.9441, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 32.00017843866171, | |
| "eval_accuracy": 0.7312337507081383, | |
| "eval_loss": 2.0020694732666016, | |
| "eval_macro_f1": 0.2978414990821893, | |
| "eval_runtime": 48.306, | |
| "eval_samples_per_second": 13.911, | |
| "eval_steps_per_second": 3.478, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 33.00004460966543, | |
| "grad_norm": 3.4699764251708984, | |
| "learning_rate": 3.345724907063197e-05, | |
| "loss": 1.8625, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 33.000416356877324, | |
| "grad_norm": 7.346961975097656, | |
| "learning_rate": 3.4200743494423794e-05, | |
| "loss": 1.8603, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 33.000416356877324, | |
| "eval_accuracy": 0.7178409171443214, | |
| "eval_loss": 2.070431709289551, | |
| "eval_macro_f1": 0.29655410586263253, | |
| "eval_runtime": 48.3374, | |
| "eval_samples_per_second": 13.902, | |
| "eval_steps_per_second": 3.476, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 34.00028252788104, | |
| "grad_norm": 7.2747015953063965, | |
| "learning_rate": 3.4944237918215615e-05, | |
| "loss": 1.8716, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 35.00014869888476, | |
| "grad_norm": 27.447582244873047, | |
| "learning_rate": 3.5687732342007436e-05, | |
| "loss": 1.8589, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 35.00014869888476, | |
| "eval_accuracy": 0.7422280762176736, | |
| "eval_loss": 1.863403081893921, | |
| "eval_macro_f1": 0.33096456369239546, | |
| "eval_runtime": 47.8041, | |
| "eval_samples_per_second": 14.057, | |
| "eval_steps_per_second": 3.514, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 36.000014869888474, | |
| "grad_norm": 4.940944194793701, | |
| "learning_rate": 3.643122676579926e-05, | |
| "loss": 1.7339, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 36.000386617100375, | |
| "grad_norm": 4.48024845123291, | |
| "learning_rate": 3.717472118959108e-05, | |
| "loss": 1.6732, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 36.000386617100375, | |
| "eval_accuracy": 0.7466635662811762, | |
| "eval_loss": 1.8566985130310059, | |
| "eval_macro_f1": 0.3421978183973209, | |
| "eval_runtime": 48.6552, | |
| "eval_samples_per_second": 13.811, | |
| "eval_steps_per_second": 3.453, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 37.00025278810409, | |
| "grad_norm": 5.992680072784424, | |
| "learning_rate": 3.79182156133829e-05, | |
| "loss": 1.6829, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 38.00011895910781, | |
| "grad_norm": 3.3229565620422363, | |
| "learning_rate": 3.866171003717473e-05, | |
| "loss": 1.6328, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 38.00011895910781, | |
| "eval_accuracy": 0.7481501704372262, | |
| "eval_loss": 1.8362157344818115, | |
| "eval_macro_f1": 0.3485086625465703, | |
| "eval_runtime": 48.0664, | |
| "eval_samples_per_second": 13.981, | |
| "eval_steps_per_second": 3.495, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 38.0004907063197, | |
| "grad_norm": 5.226800441741943, | |
| "learning_rate": 3.940520446096654e-05, | |
| "loss": 1.6336, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 39.00035687732342, | |
| "grad_norm": 4.662332057952881, | |
| "learning_rate": 4.014869888475837e-05, | |
| "loss": 1.5936, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 39.00035687732342, | |
| "eval_accuracy": 0.7517626786767897, | |
| "eval_loss": 1.7911369800567627, | |
| "eval_macro_f1": 0.37362545278695913, | |
| "eval_runtime": 48.5579, | |
| "eval_samples_per_second": 13.839, | |
| "eval_steps_per_second": 3.46, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 40.000223048327136, | |
| "grad_norm": 5.28360652923584, | |
| "learning_rate": 4.0892193308550185e-05, | |
| "loss": 1.5267, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 41.00008921933085, | |
| "grad_norm": 13.149215698242188, | |
| "learning_rate": 4.163568773234201e-05, | |
| "loss": 1.5476, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 41.00008921933085, | |
| "eval_accuracy": 0.743210771307319, | |
| "eval_loss": 1.9220155477523804, | |
| "eval_macro_f1": 0.3483976776394514, | |
| "eval_runtime": 46.7958, | |
| "eval_samples_per_second": 14.36, | |
| "eval_steps_per_second": 3.59, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 41.000460966542754, | |
| "grad_norm": 4.578529357910156, | |
| "learning_rate": 4.237918215613383e-05, | |
| "loss": 1.512, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 42.00032713754647, | |
| "grad_norm": 5.931751728057861, | |
| "learning_rate": 4.3122676579925655e-05, | |
| "loss": 1.4547, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 42.00032713754647, | |
| "eval_accuracy": 0.7573206038563081, | |
| "eval_loss": 1.7732130289077759, | |
| "eval_macro_f1": 0.3823146615563383, | |
| "eval_runtime": 47.9976, | |
| "eval_samples_per_second": 14.001, | |
| "eval_steps_per_second": 3.5, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 43.00019330855019, | |
| "grad_norm": 6.435998916625977, | |
| "learning_rate": 4.3866171003717476e-05, | |
| "loss": 1.4742, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 44.000059479553904, | |
| "grad_norm": 5.018718242645264, | |
| "learning_rate": 4.46096654275093e-05, | |
| "loss": 1.4111, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 44.000059479553904, | |
| "eval_accuracy": 0.7604534766737875, | |
| "eval_loss": 1.8028564453125, | |
| "eval_macro_f1": 0.3851260308276024, | |
| "eval_runtime": 47.8675, | |
| "eval_samples_per_second": 14.039, | |
| "eval_steps_per_second": 3.51, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 44.0004312267658, | |
| "grad_norm": 6.0326948165893555, | |
| "learning_rate": 4.535315985130112e-05, | |
| "loss": 1.4035, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 45.000297397769515, | |
| "grad_norm": 6.028076171875, | |
| "learning_rate": 4.609665427509294e-05, | |
| "loss": 1.3497, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 45.000297397769515, | |
| "eval_accuracy": 0.768372023656696, | |
| "eval_loss": 1.7707154750823975, | |
| "eval_macro_f1": 0.4061185384890694, | |
| "eval_runtime": 47.5778, | |
| "eval_samples_per_second": 14.124, | |
| "eval_steps_per_second": 3.531, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 46.00016356877323, | |
| "grad_norm": 4.857266426086426, | |
| "learning_rate": 4.684014869888476e-05, | |
| "loss": 1.3224, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 47.00002973977695, | |
| "grad_norm": 4.493041038513184, | |
| "learning_rate": 4.758364312267658e-05, | |
| "loss": 1.3677, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 47.00002973977695, | |
| "eval_accuracy": 0.7547429832358992, | |
| "eval_loss": 1.8069781064987183, | |
| "eval_macro_f1": 0.4303426673990079, | |
| "eval_runtime": 47.9766, | |
| "eval_samples_per_second": 14.007, | |
| "eval_steps_per_second": 3.502, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 47.00040148698885, | |
| "grad_norm": 5.508673191070557, | |
| "learning_rate": 4.83271375464684e-05, | |
| "loss": 1.2983, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 48.000267657992566, | |
| "grad_norm": 2.9697608947753906, | |
| "learning_rate": 4.907063197026023e-05, | |
| "loss": 1.2558, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 48.000267657992566, | |
| "eval_accuracy": 0.770120629172458, | |
| "eval_loss": 1.7805372476577759, | |
| "eval_macro_f1": 0.4152670510054953, | |
| "eval_runtime": 47.6206, | |
| "eval_samples_per_second": 14.112, | |
| "eval_steps_per_second": 3.528, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 49.00013382899628, | |
| "grad_norm": 3.1506409645080566, | |
| "learning_rate": 4.9814126394052045e-05, | |
| "loss": 1.2456, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 49.00050557620818, | |
| "grad_norm": 2.9008400440216064, | |
| "learning_rate": 5.0557620817843867e-05, | |
| "loss": 1.2228, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 49.00050557620818, | |
| "eval_accuracy": 0.7753736194741223, | |
| "eval_loss": 1.6754295825958252, | |
| "eval_macro_f1": 0.4497377753131999, | |
| "eval_runtime": 46.8789, | |
| "eval_samples_per_second": 14.335, | |
| "eval_steps_per_second": 3.584, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 50.000371747211894, | |
| "grad_norm": 14.068544387817383, | |
| "learning_rate": 5.130111524163569e-05, | |
| "loss": 1.2156, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 51.00023791821561, | |
| "grad_norm": 7.377778053283691, | |
| "learning_rate": 5.2044609665427516e-05, | |
| "loss": 1.1783, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 51.00023791821561, | |
| "eval_accuracy": 0.7714801943968466, | |
| "eval_loss": 1.7035390138626099, | |
| "eval_macro_f1": 0.43929205620995815, | |
| "eval_runtime": 47.4365, | |
| "eval_samples_per_second": 14.166, | |
| "eval_steps_per_second": 3.542, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 52.00010408921933, | |
| "grad_norm": 5.86587381362915, | |
| "learning_rate": 5.278810408921934e-05, | |
| "loss": 1.1385, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 52.00047583643123, | |
| "grad_norm": 7.151806831359863, | |
| "learning_rate": 5.353159851301115e-05, | |
| "loss": 1.1714, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 52.00047583643123, | |
| "eval_accuracy": 0.776484537461446, | |
| "eval_loss": 1.6419715881347656, | |
| "eval_macro_f1": 0.46826783464883714, | |
| "eval_runtime": 47.8203, | |
| "eval_samples_per_second": 14.053, | |
| "eval_steps_per_second": 3.513, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 53.000342007434945, | |
| "grad_norm": 3.089052677154541, | |
| "learning_rate": 5.427509293680297e-05, | |
| "loss": 1.1051, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 54.00020817843866, | |
| "grad_norm": 4.17380428314209, | |
| "learning_rate": 5.50185873605948e-05, | |
| "loss": 1.1496, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 54.00020817843866, | |
| "eval_accuracy": 0.783282215260751, | |
| "eval_loss": 1.6009899377822876, | |
| "eval_macro_f1": 0.4684935245173467, | |
| "eval_runtime": 47.1333, | |
| "eval_samples_per_second": 14.257, | |
| "eval_steps_per_second": 3.564, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 55.00007434944238, | |
| "grad_norm": 3.879401683807373, | |
| "learning_rate": 5.576208178438662e-05, | |
| "loss": 1.123, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 55.00044609665427, | |
| "grad_norm": 5.521020889282227, | |
| "learning_rate": 5.650557620817844e-05, | |
| "loss": 1.0547, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 55.00044609665427, | |
| "eval_accuracy": 0.7832706110213381, | |
| "eval_loss": 1.6715576648712158, | |
| "eval_macro_f1": 0.46885622369709506, | |
| "eval_runtime": 47.4503, | |
| "eval_samples_per_second": 14.162, | |
| "eval_steps_per_second": 3.541, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 56.00031226765799, | |
| "grad_norm": 6.454986095428467, | |
| "learning_rate": 5.724907063197026e-05, | |
| "loss": 1.0531, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 57.00017843866171, | |
| "grad_norm": 9.67890453338623, | |
| "learning_rate": 5.7992565055762085e-05, | |
| "loss": 1.0515, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 57.00017843866171, | |
| "eval_accuracy": 0.787819132865917, | |
| "eval_loss": 1.6513065099716187, | |
| "eval_macro_f1": 0.4838647951545401, | |
| "eval_runtime": 47.7738, | |
| "eval_samples_per_second": 14.066, | |
| "eval_steps_per_second": 3.517, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 58.00004460966543, | |
| "grad_norm": 9.32218074798584, | |
| "learning_rate": 5.8736059479553906e-05, | |
| "loss": 1.0074, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 58.000416356877324, | |
| "grad_norm": 3.381666898727417, | |
| "learning_rate": 5.947955390334573e-05, | |
| "loss": 1.0019, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 58.000416356877324, | |
| "eval_accuracy": 0.7864938662716643, | |
| "eval_loss": 1.6623215675354004, | |
| "eval_macro_f1": 0.4932068292621268, | |
| "eval_runtime": 47.6569, | |
| "eval_samples_per_second": 14.101, | |
| "eval_steps_per_second": 3.525, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 59.00028252788104, | |
| "grad_norm": 5.850093364715576, | |
| "learning_rate": 6.0223048327137555e-05, | |
| "loss": 1.0, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 60.00014869888476, | |
| "grad_norm": 3.286019802093506, | |
| "learning_rate": 6.096654275092937e-05, | |
| "loss": 0.9517, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 60.00014869888476, | |
| "eval_accuracy": 0.788711438830616, | |
| "eval_loss": 1.5527822971343994, | |
| "eval_macro_f1": 0.4998529689254287, | |
| "eval_runtime": 47.1648, | |
| "eval_samples_per_second": 14.248, | |
| "eval_steps_per_second": 3.562, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 61.000014869888474, | |
| "grad_norm": 5.813722133636475, | |
| "learning_rate": 6.171003717472119e-05, | |
| "loss": 0.95, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 61.000386617100375, | |
| "grad_norm": 6.047854423522949, | |
| "learning_rate": 6.245353159851302e-05, | |
| "loss": 0.9249, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 61.000386617100375, | |
| "eval_accuracy": 0.7949442146268098, | |
| "eval_loss": 1.552842617034912, | |
| "eval_macro_f1": 0.5062827822451884, | |
| "eval_runtime": 47.7291, | |
| "eval_samples_per_second": 14.079, | |
| "eval_steps_per_second": 3.52, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 62.00025278810409, | |
| "grad_norm": 7.298818588256836, | |
| "learning_rate": 6.319702602230483e-05, | |
| "loss": 0.9241, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 63.00011895910781, | |
| "grad_norm": 3.9685351848602295, | |
| "learning_rate": 6.394052044609665e-05, | |
| "loss": 0.9072, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 63.00011895910781, | |
| "eval_accuracy": 0.7972720697174538, | |
| "eval_loss": 1.595497488975525, | |
| "eval_macro_f1": 0.5229591985542493, | |
| "eval_runtime": 47.7421, | |
| "eval_samples_per_second": 14.076, | |
| "eval_steps_per_second": 3.519, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 63.0004907063197, | |
| "grad_norm": 6.513078689575195, | |
| "learning_rate": 6.468401486988848e-05, | |
| "loss": 0.9064, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 64.00035687732343, | |
| "grad_norm": 9.337983131408691, | |
| "learning_rate": 6.54275092936803e-05, | |
| "loss": 0.8861, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 64.00035687732343, | |
| "eval_accuracy": 0.8012236394427585, | |
| "eval_loss": 1.577151894569397, | |
| "eval_macro_f1": 0.5266623902892644, | |
| "eval_runtime": 47.7981, | |
| "eval_samples_per_second": 14.059, | |
| "eval_steps_per_second": 3.515, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 65.00022304832714, | |
| "grad_norm": 6.658684730529785, | |
| "learning_rate": 6.617100371747212e-05, | |
| "loss": 0.8829, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 66.00008921933086, | |
| "grad_norm": 7.026724815368652, | |
| "learning_rate": 6.691449814126395e-05, | |
| "loss": 0.8687, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 66.00008921933086, | |
| "eval_accuracy": 0.8062816088160649, | |
| "eval_loss": 1.5187242031097412, | |
| "eval_macro_f1": 0.5236433362876899, | |
| "eval_runtime": 47.5759, | |
| "eval_samples_per_second": 14.125, | |
| "eval_steps_per_second": 3.531, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 66.00046096654275, | |
| "grad_norm": 3.1086082458496094, | |
| "learning_rate": 6.765799256505576e-05, | |
| "loss": 0.8541, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 67.00032713754646, | |
| "grad_norm": 2.039297342300415, | |
| "learning_rate": 6.840148698884759e-05, | |
| "loss": 0.8164, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 67.00032713754646, | |
| "eval_accuracy": 0.8023215844672498, | |
| "eval_loss": 1.605806589126587, | |
| "eval_macro_f1": 0.5408551755484401, | |
| "eval_runtime": 47.4302, | |
| "eval_samples_per_second": 14.168, | |
| "eval_steps_per_second": 3.542, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 68.00019330855018, | |
| "grad_norm": 5.141190528869629, | |
| "learning_rate": 6.91449814126394e-05, | |
| "loss": 0.8411, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 69.0000594795539, | |
| "grad_norm": 6.119557857513428, | |
| "learning_rate": 6.988847583643123e-05, | |
| "loss": 0.7923, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 69.0000594795539, | |
| "eval_accuracy": 0.7978375042402981, | |
| "eval_loss": 1.5691826343536377, | |
| "eval_macro_f1": 0.541571467839333, | |
| "eval_runtime": 47.0961, | |
| "eval_samples_per_second": 14.269, | |
| "eval_steps_per_second": 3.567, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 69.0004312267658, | |
| "grad_norm": 5.380624294281006, | |
| "learning_rate": 7.063197026022306e-05, | |
| "loss": 0.8052, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 70.00029739776951, | |
| "grad_norm": 7.781818389892578, | |
| "learning_rate": 7.137546468401487e-05, | |
| "loss": 0.8065, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 70.00029739776951, | |
| "eval_accuracy": 0.7904748964659136, | |
| "eval_loss": 1.6525987386703491, | |
| "eval_macro_f1": 0.5321248674751812, | |
| "eval_runtime": 47.8684, | |
| "eval_samples_per_second": 14.038, | |
| "eval_steps_per_second": 3.51, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 71.00016356877323, | |
| "grad_norm": 2.671602249145508, | |
| "learning_rate": 7.211895910780669e-05, | |
| "loss": 0.7815, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 72.00002973977695, | |
| "grad_norm": 4.526907444000244, | |
| "learning_rate": 7.286245353159852e-05, | |
| "loss": 0.771, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 72.00002973977695, | |
| "eval_accuracy": 0.7965682882088077, | |
| "eval_loss": 1.6163409948349, | |
| "eval_macro_f1": 0.5339910056580364, | |
| "eval_runtime": 47.2867, | |
| "eval_samples_per_second": 14.211, | |
| "eval_steps_per_second": 3.553, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 72.00040148698885, | |
| "grad_norm": 4.758114337921143, | |
| "learning_rate": 7.360594795539034e-05, | |
| "loss": 0.7414, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 73.00026765799257, | |
| "grad_norm": 4.144097805023193, | |
| "learning_rate": 7.434944237918216e-05, | |
| "loss": 0.7377, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 73.00026765799257, | |
| "eval_accuracy": 0.8146328689405671, | |
| "eval_loss": 1.5363017320632935, | |
| "eval_macro_f1": 0.5563859432409435, | |
| "eval_runtime": 47.0425, | |
| "eval_samples_per_second": 14.285, | |
| "eval_steps_per_second": 3.571, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 74.00013382899628, | |
| "grad_norm": 4.5283284187316895, | |
| "learning_rate": 7.509293680297399e-05, | |
| "loss": 0.7373, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 74.00050557620818, | |
| "grad_norm": 4.018579959869385, | |
| "learning_rate": 7.58364312267658e-05, | |
| "loss": 0.7194, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 74.00050557620818, | |
| "eval_accuracy": 0.8134706343704267, | |
| "eval_loss": 1.5270371437072754, | |
| "eval_macro_f1": 0.5648332270298382, | |
| "eval_runtime": 47.1218, | |
| "eval_samples_per_second": 14.261, | |
| "eval_steps_per_second": 3.565, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 75.0003717472119, | |
| "grad_norm": 4.909033298492432, | |
| "learning_rate": 7.657992565055763e-05, | |
| "loss": 0.7112, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 76.00023791821562, | |
| "grad_norm": 6.122696876525879, | |
| "learning_rate": 7.732342007434946e-05, | |
| "loss": 0.706, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 76.00023791821562, | |
| "eval_accuracy": 0.8068405032797368, | |
| "eval_loss": 1.5804245471954346, | |
| "eval_macro_f1": 0.5562806947678465, | |
| "eval_runtime": 47.508, | |
| "eval_samples_per_second": 14.145, | |
| "eval_steps_per_second": 3.536, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 77.00010408921933, | |
| "grad_norm": 3.1820068359375, | |
| "learning_rate": 7.806691449814127e-05, | |
| "loss": 0.6735, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 77.00047583643122, | |
| "grad_norm": 3.543308734893799, | |
| "learning_rate": 7.881040892193308e-05, | |
| "loss": 0.6827, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 77.00047583643122, | |
| "eval_accuracy": 0.8132421110520636, | |
| "eval_loss": 1.5398247241973877, | |
| "eval_macro_f1": 0.5569486652835459, | |
| "eval_runtime": 46.848, | |
| "eval_samples_per_second": 14.344, | |
| "eval_steps_per_second": 3.586, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 78.00034200743494, | |
| "grad_norm": 4.693170547485352, | |
| "learning_rate": 7.955390334572491e-05, | |
| "loss": 0.6835, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 79.00020817843865, | |
| "grad_norm": 2.5306339263916016, | |
| "learning_rate": 8.029739776951674e-05, | |
| "loss": 0.6852, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 79.00020817843865, | |
| "eval_accuracy": 0.8227611279450846, | |
| "eval_loss": 1.4755533933639526, | |
| "eval_macro_f1": 0.5733192275425867, | |
| "eval_runtime": 47.7076, | |
| "eval_samples_per_second": 14.086, | |
| "eval_steps_per_second": 3.521, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 80.00007434944239, | |
| "grad_norm": 2.764157295227051, | |
| "learning_rate": 8.104089219330855e-05, | |
| "loss": 0.6602, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 80.00044609665427, | |
| "grad_norm": 3.0805418491363525, | |
| "learning_rate": 8.178438661710037e-05, | |
| "loss": 0.6333, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 80.00044609665427, | |
| "eval_accuracy": 0.8215874176654338, | |
| "eval_loss": 1.4958947896957397, | |
| "eval_macro_f1": 0.5765976404305969, | |
| "eval_runtime": 47.5831, | |
| "eval_samples_per_second": 14.123, | |
| "eval_steps_per_second": 3.531, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 81.00031226765799, | |
| "grad_norm": 3.5253844261169434, | |
| "learning_rate": 8.25278810408922e-05, | |
| "loss": 0.6417, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 82.0001784386617, | |
| "grad_norm": 3.3636598587036133, | |
| "learning_rate": 8.327137546468403e-05, | |
| "loss": 0.6288, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 82.0001784386617, | |
| "eval_accuracy": 0.8281630899930705, | |
| "eval_loss": 1.4461908340454102, | |
| "eval_macro_f1": 0.5842528545422506, | |
| "eval_runtime": 47.5162, | |
| "eval_samples_per_second": 14.143, | |
| "eval_steps_per_second": 3.536, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 83.00004460966542, | |
| "grad_norm": 3.808940887451172, | |
| "learning_rate": 8.401486988847584e-05, | |
| "loss": 0.6302, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 83.00041635687732, | |
| "grad_norm": 2.76824951171875, | |
| "learning_rate": 8.475836431226765e-05, | |
| "loss": 0.6165, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 83.00041635687732, | |
| "eval_accuracy": 0.8236607576629754, | |
| "eval_loss": 1.5466784238815308, | |
| "eval_macro_f1": 0.5788110277050088, | |
| "eval_runtime": 46.9322, | |
| "eval_samples_per_second": 14.319, | |
| "eval_steps_per_second": 3.58, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 84.00028252788104, | |
| "grad_norm": 3.550778388977051, | |
| "learning_rate": 8.550185873605948e-05, | |
| "loss": 0.6126, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 85.00014869888476, | |
| "grad_norm": 4.159547805786133, | |
| "learning_rate": 8.624535315985131e-05, | |
| "loss": 0.6153, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 85.00014869888476, | |
| "eval_accuracy": 0.8248911419166896, | |
| "eval_loss": 1.5206892490386963, | |
| "eval_macro_f1": 0.5832331531893454, | |
| "eval_runtime": 47.5885, | |
| "eval_samples_per_second": 14.121, | |
| "eval_steps_per_second": 3.53, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 86.00001486988847, | |
| "grad_norm": 3.1821999549865723, | |
| "learning_rate": 8.698884758364312e-05, | |
| "loss": 0.5915, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 86.00038661710038, | |
| "grad_norm": 3.598517894744873, | |
| "learning_rate": 8.773234200743495e-05, | |
| "loss": 0.593, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 86.00038661710038, | |
| "eval_accuracy": 0.8303127795000983, | |
| "eval_loss": 1.5167983770370483, | |
| "eval_macro_f1": 0.5917533935658699, | |
| "eval_runtime": 47.842, | |
| "eval_samples_per_second": 14.046, | |
| "eval_steps_per_second": 3.512, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 87.00025278810409, | |
| "grad_norm": 2.605105400085449, | |
| "learning_rate": 8.847583643122677e-05, | |
| "loss": 0.5804, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 88.00011895910781, | |
| "grad_norm": 7.943371295928955, | |
| "learning_rate": 8.92193308550186e-05, | |
| "loss": 0.5669, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 88.00011895910781, | |
| "eval_accuracy": 0.8296147233158495, | |
| "eval_loss": 1.494034767150879, | |
| "eval_macro_f1": 0.58373317591399, | |
| "eval_runtime": 47.863, | |
| "eval_samples_per_second": 14.04, | |
| "eval_steps_per_second": 3.51, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 88.00049070631971, | |
| "grad_norm": 2.271465301513672, | |
| "learning_rate": 8.996282527881041e-05, | |
| "loss": 0.5878, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 89.00035687732343, | |
| "grad_norm": 4.07094669342041, | |
| "learning_rate": 9.070631970260224e-05, | |
| "loss": 0.5583, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 89.00035687732343, | |
| "eval_accuracy": 0.8304963970843622, | |
| "eval_loss": 1.4894771575927734, | |
| "eval_macro_f1": 0.596375277813928, | |
| "eval_runtime": 47.7329, | |
| "eval_samples_per_second": 14.078, | |
| "eval_steps_per_second": 3.52, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 90.00022304832714, | |
| "grad_norm": 3.257551670074463, | |
| "learning_rate": 9.144981412639405e-05, | |
| "loss": 0.5567, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 91.00008921933086, | |
| "grad_norm": 2.342348575592041, | |
| "learning_rate": 9.219330855018588e-05, | |
| "loss": 0.5479, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 91.00008921933086, | |
| "eval_accuracy": 0.8359645780191465, | |
| "eval_loss": 1.5507783889770508, | |
| "eval_macro_f1": 0.5961247601926936, | |
| "eval_runtime": 47.1119, | |
| "eval_samples_per_second": 14.264, | |
| "eval_steps_per_second": 3.566, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 91.00046096654275, | |
| "grad_norm": 4.675436496734619, | |
| "learning_rate": 9.29368029739777e-05, | |
| "loss": 0.5373, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 92.00032713754646, | |
| "grad_norm": 2.5292415618896484, | |
| "learning_rate": 9.368029739776952e-05, | |
| "loss": 0.5385, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 92.00032713754646, | |
| "eval_accuracy": 0.8331121322684258, | |
| "eval_loss": 1.4987480640411377, | |
| "eval_macro_f1": 0.6056008732625726, | |
| "eval_runtime": 47.6126, | |
| "eval_samples_per_second": 14.114, | |
| "eval_steps_per_second": 3.528, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 93.00019330855018, | |
| "grad_norm": 4.040349006652832, | |
| "learning_rate": 9.442379182156135e-05, | |
| "loss": 0.5444, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 94.0000594795539, | |
| "grad_norm": 4.418771266937256, | |
| "learning_rate": 9.516728624535316e-05, | |
| "loss": 0.5191, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 94.0000594795539, | |
| "eval_accuracy": 0.8352555949405721, | |
| "eval_loss": 1.5468343496322632, | |
| "eval_macro_f1": 0.6039138175393013, | |
| "eval_runtime": 48.0596, | |
| "eval_samples_per_second": 13.983, | |
| "eval_steps_per_second": 3.496, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 94.0004312267658, | |
| "grad_norm": 3.186802864074707, | |
| "learning_rate": 9.591078066914498e-05, | |
| "loss": 0.5344, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 95.00029739776951, | |
| "grad_norm": 3.07525372505188, | |
| "learning_rate": 9.66542750929368e-05, | |
| "loss": 0.5123, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 95.00029739776951, | |
| "eval_accuracy": 0.8292436626419579, | |
| "eval_loss": 1.5689940452575684, | |
| "eval_macro_f1": 0.6086947888526043, | |
| "eval_runtime": 47.3771, | |
| "eval_samples_per_second": 14.184, | |
| "eval_steps_per_second": 3.546, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 96.00016356877323, | |
| "grad_norm": 4.3406243324279785, | |
| "learning_rate": 9.739776951672863e-05, | |
| "loss": 0.5028, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 97.00002973977695, | |
| "grad_norm": 4.482454299926758, | |
| "learning_rate": 9.814126394052046e-05, | |
| "loss": 0.531, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 97.00002973977695, | |
| "eval_accuracy": 0.8345079898078186, | |
| "eval_loss": 1.422137975692749, | |
| "eval_macro_f1": 0.6109642448055717, | |
| "eval_runtime": 47.3199, | |
| "eval_samples_per_second": 14.201, | |
| "eval_steps_per_second": 3.55, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 97.00040148698885, | |
| "grad_norm": 2.9254748821258545, | |
| "learning_rate": 9.888475836431226e-05, | |
| "loss": 0.4891, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 98.00026765799257, | |
| "grad_norm": 2.5848145484924316, | |
| "learning_rate": 9.962825278810409e-05, | |
| "loss": 0.5194, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 98.00026765799257, | |
| "eval_accuracy": 0.8365044824717158, | |
| "eval_loss": 1.4362893104553223, | |
| "eval_macro_f1": 0.6104741259771145, | |
| "eval_runtime": 47.8303, | |
| "eval_samples_per_second": 14.05, | |
| "eval_steps_per_second": 3.512, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 99.00013382899628, | |
| "grad_norm": 2.0179648399353027, | |
| "learning_rate": 9.998043435726863e-05, | |
| "loss": 0.4938, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 99.00050557620818, | |
| "grad_norm": 3.029017210006714, | |
| "learning_rate": 9.994130307180592e-05, | |
| "loss": 0.4885, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 99.00050557620818, | |
| "eval_accuracy": 0.8297226753963072, | |
| "eval_loss": 1.5579314231872559, | |
| "eval_macro_f1": 0.6109209923521035, | |
| "eval_runtime": 47.6341, | |
| "eval_samples_per_second": 14.108, | |
| "eval_steps_per_second": 3.527, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 100.0003717472119, | |
| "grad_norm": 10.595804214477539, | |
| "learning_rate": 9.990217178634319e-05, | |
| "loss": 0.4856, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 101.00023791821562, | |
| "grad_norm": 3.8170125484466553, | |
| "learning_rate": 9.986304050088046e-05, | |
| "loss": 0.4672, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 101.00023791821562, | |
| "eval_accuracy": 0.8405256629197011, | |
| "eval_loss": 1.5172147750854492, | |
| "eval_macro_f1": 0.6117042873240769, | |
| "eval_runtime": 47.3002, | |
| "eval_samples_per_second": 14.207, | |
| "eval_steps_per_second": 3.552, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 102.00010408921933, | |
| "grad_norm": 2.297795295715332, | |
| "learning_rate": 9.982390921541773e-05, | |
| "loss": 0.4861, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 102.00047583643122, | |
| "grad_norm": 2.0339696407318115, | |
| "learning_rate": 9.978477792995501e-05, | |
| "loss": 0.4791, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 102.00047583643122, | |
| "eval_accuracy": 0.8386383728766971, | |
| "eval_loss": 1.5465196371078491, | |
| "eval_macro_f1": 0.6121669524563613, | |
| "eval_runtime": 47.7339, | |
| "eval_samples_per_second": 14.078, | |
| "eval_steps_per_second": 3.52, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 103.00034200743494, | |
| "grad_norm": 1.891178846359253, | |
| "learning_rate": 9.974564664449228e-05, | |
| "loss": 0.459, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 104.00020817843865, | |
| "grad_norm": 2.3269009590148926, | |
| "learning_rate": 9.970651535902955e-05, | |
| "loss": 0.4622, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 104.00020817843865, | |
| "eval_accuracy": 0.8428495839253495, | |
| "eval_loss": 1.5032823085784912, | |
| "eval_macro_f1": 0.6198375498118172, | |
| "eval_runtime": 47.9251, | |
| "eval_samples_per_second": 14.022, | |
| "eval_steps_per_second": 3.505, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 105.00007434944239, | |
| "grad_norm": 2.6395773887634277, | |
| "learning_rate": 9.966738407356682e-05, | |
| "loss": 0.4773, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 105.00044609665427, | |
| "grad_norm": 2.4456825256347656, | |
| "learning_rate": 9.962825278810409e-05, | |
| "loss": 0.4392, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 105.00044609665427, | |
| "eval_accuracy": 0.8425608684033172, | |
| "eval_loss": 1.506137490272522, | |
| "eval_macro_f1": 0.620540991552123, | |
| "eval_runtime": 48.1909, | |
| "eval_samples_per_second": 13.945, | |
| "eval_steps_per_second": 3.486, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 106.00031226765799, | |
| "grad_norm": 2.2125296592712402, | |
| "learning_rate": 9.958912150264137e-05, | |
| "loss": 0.4427, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 107.0001784386617, | |
| "grad_norm": 2.6064858436584473, | |
| "learning_rate": 9.954999021717863e-05, | |
| "loss": 0.4486, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 107.0001784386617, | |
| "eval_accuracy": 0.8388894429121909, | |
| "eval_loss": 1.5635102987289429, | |
| "eval_macro_f1": 0.6163644154221748, | |
| "eval_runtime": 48.2598, | |
| "eval_samples_per_second": 13.925, | |
| "eval_steps_per_second": 3.481, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 108.00004460966542, | |
| "grad_norm": 1.7639355659484863, | |
| "learning_rate": 9.951085893171591e-05, | |
| "loss": 0.434, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 108.00041635687732, | |
| "grad_norm": 1.9860466718673706, | |
| "learning_rate": 9.947172764625318e-05, | |
| "loss": 0.4339, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 108.00041635687732, | |
| "eval_accuracy": 0.8519226590782587, | |
| "eval_loss": 1.425977110862732, | |
| "eval_macro_f1": 0.6347526514291026, | |
| "eval_runtime": 48.0295, | |
| "eval_samples_per_second": 13.991, | |
| "eval_steps_per_second": 3.498, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 109.00028252788104, | |
| "grad_norm": 7.6796441078186035, | |
| "learning_rate": 9.943259636079045e-05, | |
| "loss": 0.4346, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 110.00014869888476, | |
| "grad_norm": 2.4183762073516846, | |
| "learning_rate": 9.939346507532772e-05, | |
| "loss": 0.4256, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 110.00014869888476, | |
| "eval_accuracy": 0.844811035558245, | |
| "eval_loss": 1.6024999618530273, | |
| "eval_macro_f1": 0.6297670917886972, | |
| "eval_runtime": 47.985, | |
| "eval_samples_per_second": 14.004, | |
| "eval_steps_per_second": 3.501, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 111.00001486988847, | |
| "grad_norm": 3.8100974559783936, | |
| "learning_rate": 9.935433378986501e-05, | |
| "loss": 0.4296, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 111.00038661710038, | |
| "grad_norm": 1.486348032951355, | |
| "learning_rate": 9.931520250440226e-05, | |
| "loss": 0.4162, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 111.00038661710038, | |
| "eval_accuracy": 0.8440565173857372, | |
| "eval_loss": 1.533053994178772, | |
| "eval_macro_f1": 0.6327355016288086, | |
| "eval_runtime": 48.0302, | |
| "eval_samples_per_second": 13.991, | |
| "eval_steps_per_second": 3.498, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 112.00025278810409, | |
| "grad_norm": 3.1359190940856934, | |
| "learning_rate": 9.927607121893955e-05, | |
| "loss": 0.4187, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 113.00011895910781, | |
| "grad_norm": 1.8909200429916382, | |
| "learning_rate": 9.923693993347682e-05, | |
| "loss": 0.413, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 113.00011895910781, | |
| "eval_accuracy": 0.8493778775360123, | |
| "eval_loss": 1.4858617782592773, | |
| "eval_macro_f1": 0.6237896813023426, | |
| "eval_runtime": 48.1079, | |
| "eval_samples_per_second": 13.969, | |
| "eval_steps_per_second": 3.492, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 113.00049070631971, | |
| "grad_norm": 2.108914852142334, | |
| "learning_rate": 9.919780864801409e-05, | |
| "loss": 0.4126, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 114.00035687732343, | |
| "grad_norm": 2.574831247329712, | |
| "learning_rate": 9.915867736255137e-05, | |
| "loss": 0.4146, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 114.00035687732343, | |
| "eval_accuracy": 0.8482534627998802, | |
| "eval_loss": 1.578758955001831, | |
| "eval_macro_f1": 0.632708327751029, | |
| "eval_runtime": 47.495, | |
| "eval_samples_per_second": 14.149, | |
| "eval_steps_per_second": 3.537, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 115.00022304832714, | |
| "grad_norm": 2.461167335510254, | |
| "learning_rate": 9.911954607708863e-05, | |
| "loss": 0.4003, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 116.00008921933086, | |
| "grad_norm": 2.769954204559326, | |
| "learning_rate": 9.908041479162591e-05, | |
| "loss": 0.4176, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 116.00008921933086, | |
| "eval_accuracy": 0.8472196779845549, | |
| "eval_loss": 1.5794305801391602, | |
| "eval_macro_f1": 0.6313642185620171, | |
| "eval_runtime": 47.7317, | |
| "eval_samples_per_second": 14.079, | |
| "eval_steps_per_second": 3.52, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 116.00046096654275, | |
| "grad_norm": 1.5411442518234253, | |
| "learning_rate": 9.904128350616318e-05, | |
| "loss": 0.3905, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 117.00032713754646, | |
| "grad_norm": 2.8972113132476807, | |
| "learning_rate": 9.900215222070045e-05, | |
| "loss": 0.4125, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 117.00032713754646, | |
| "eval_accuracy": 0.847543988494726, | |
| "eval_loss": 1.4954912662506104, | |
| "eval_macro_f1": 0.6424840275505329, | |
| "eval_runtime": 47.4476, | |
| "eval_samples_per_second": 14.163, | |
| "eval_steps_per_second": 3.541, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 118.00019330855018, | |
| "grad_norm": 2.530663013458252, | |
| "learning_rate": 9.896302093523772e-05, | |
| "loss": 0.3915, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 119.0000594795539, | |
| "grad_norm": 2.1395466327667236, | |
| "learning_rate": 9.8923889649775e-05, | |
| "loss": 0.389, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 119.0000594795539, | |
| "eval_accuracy": 0.8531609291900277, | |
| "eval_loss": 1.4969804286956787, | |
| "eval_macro_f1": 0.640615164107896, | |
| "eval_runtime": 47.2643, | |
| "eval_samples_per_second": 14.218, | |
| "eval_steps_per_second": 3.554, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 119.0004312267658, | |
| "grad_norm": 8.651265144348145, | |
| "learning_rate": 9.888475836431226e-05, | |
| "loss": 0.391, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 120.00029739776951, | |
| "grad_norm": 2.033094882965088, | |
| "learning_rate": 9.884562707884955e-05, | |
| "loss": 0.3935, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 120.00029739776951, | |
| "eval_accuracy": 0.8519182980473882, | |
| "eval_loss": 1.4476945400238037, | |
| "eval_macro_f1": 0.6439031700059592, | |
| "eval_runtime": 47.5822, | |
| "eval_samples_per_second": 14.123, | |
| "eval_steps_per_second": 3.531, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 121.00016356877323, | |
| "grad_norm": 1.3807177543640137, | |
| "learning_rate": 9.880649579338682e-05, | |
| "loss": 0.3868, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 122.00002973977695, | |
| "grad_norm": 1.9018504619598389, | |
| "learning_rate": 9.876736450792409e-05, | |
| "loss": 0.3701, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 122.00002973977695, | |
| "eval_accuracy": 0.8542941516114884, | |
| "eval_loss": 1.5301586389541626, | |
| "eval_macro_f1": 0.6438060495518036, | |
| "eval_runtime": 47.3252, | |
| "eval_samples_per_second": 14.2, | |
| "eval_steps_per_second": 3.55, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 122.00040148698885, | |
| "grad_norm": 2.0158724784851074, | |
| "learning_rate": 9.872823322246137e-05, | |
| "loss": 0.3728, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 123.00026765799257, | |
| "grad_norm": 1.53249990940094, | |
| "learning_rate": 9.868910193699864e-05, | |
| "loss": 0.3757, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 123.00026765799257, | |
| "eval_accuracy": 0.8517502815811042, | |
| "eval_loss": 1.489638328552246, | |
| "eval_macro_f1": 0.6472024615679871, | |
| "eval_runtime": 47.12, | |
| "eval_samples_per_second": 14.261, | |
| "eval_steps_per_second": 3.565, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 124.00013382899628, | |
| "grad_norm": 1.1672303676605225, | |
| "learning_rate": 9.864997065153591e-05, | |
| "loss": 0.37, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 124.00050557620818, | |
| "grad_norm": 1.6479486227035522, | |
| "learning_rate": 9.861083936607318e-05, | |
| "loss": 0.379, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 124.00050557620818, | |
| "eval_accuracy": 0.8549860939703802, | |
| "eval_loss": 1.4589107036590576, | |
| "eval_macro_f1": 0.6522917447958335, | |
| "eval_runtime": 47.0089, | |
| "eval_samples_per_second": 14.295, | |
| "eval_steps_per_second": 3.574, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 125.0003717472119, | |
| "grad_norm": 2.498363971710205, | |
| "learning_rate": 9.857170808061045e-05, | |
| "loss": 0.3714, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 126.00023791821562, | |
| "grad_norm": 1.800485610961914, | |
| "learning_rate": 9.853257679514772e-05, | |
| "loss": 0.3589, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 126.00023791821562, | |
| "eval_accuracy": 0.8579483635541026, | |
| "eval_loss": 1.5368930101394653, | |
| "eval_macro_f1": 0.6484518382038933, | |
| "eval_runtime": 47.2042, | |
| "eval_samples_per_second": 14.236, | |
| "eval_steps_per_second": 3.559, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 127.00010408921933, | |
| "grad_norm": 2.102792739868164, | |
| "learning_rate": 9.8493445509685e-05, | |
| "loss": 0.3632, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 127.00047583643122, | |
| "grad_norm": 1.4719001054763794, | |
| "learning_rate": 9.845431422422226e-05, | |
| "loss": 0.3608, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 127.00047583643122, | |
| "eval_accuracy": 0.8541380281972378, | |
| "eval_loss": 1.4789085388183594, | |
| "eval_macro_f1": 0.6526740777626168, | |
| "eval_runtime": 47.3365, | |
| "eval_samples_per_second": 14.196, | |
| "eval_steps_per_second": 3.549, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 128.00034200743494, | |
| "grad_norm": 1.1043628454208374, | |
| "learning_rate": 9.841518293875954e-05, | |
| "loss": 0.3552, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 129.00020817843867, | |
| "grad_norm": 1.163142442703247, | |
| "learning_rate": 9.837605165329681e-05, | |
| "loss": 0.3575, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 129.00020817843867, | |
| "eval_accuracy": 0.8578308939153396, | |
| "eval_loss": 1.567258358001709, | |
| "eval_macro_f1": 0.6559202691799706, | |
| "eval_runtime": 47.2359, | |
| "eval_samples_per_second": 14.226, | |
| "eval_steps_per_second": 3.557, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 130.00007434944237, | |
| "grad_norm": 2.237764596939087, | |
| "learning_rate": 9.833692036783408e-05, | |
| "loss": 0.3559, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 130.0004460966543, | |
| "grad_norm": 2.362916946411133, | |
| "learning_rate": 9.829778908237135e-05, | |
| "loss": 0.353, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 130.0004460966543, | |
| "eval_accuracy": 0.8561993702494404, | |
| "eval_loss": 1.4892395734786987, | |
| "eval_macro_f1": 0.6546700309203295, | |
| "eval_runtime": 47.1656, | |
| "eval_samples_per_second": 14.248, | |
| "eval_steps_per_second": 3.562, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 131.000312267658, | |
| "grad_norm": 1.1094646453857422, | |
| "learning_rate": 9.825865779690864e-05, | |
| "loss": 0.3503, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 132.00017843866172, | |
| "grad_norm": 0.9935618042945862, | |
| "learning_rate": 9.821952651144591e-05, | |
| "loss": 0.345, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 132.00017843866172, | |
| "eval_accuracy": 0.8525096438318296, | |
| "eval_loss": 1.518179178237915, | |
| "eval_macro_f1": 0.6513330111299382, | |
| "eval_runtime": 50.1526, | |
| "eval_samples_per_second": 13.399, | |
| "eval_steps_per_second": 3.35, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 133.00004460966542, | |
| "grad_norm": 1.5325652360916138, | |
| "learning_rate": 9.818039522598318e-05, | |
| "loss": 0.3518, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 133.0004163568773, | |
| "grad_norm": 1.7667288780212402, | |
| "learning_rate": 9.814126394052046e-05, | |
| "loss": 0.3454, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 133.0004163568773, | |
| "eval_accuracy": 0.856074093035065, | |
| "eval_loss": 1.4917337894439697, | |
| "eval_macro_f1": 0.6523897388323083, | |
| "eval_runtime": 50.7576, | |
| "eval_samples_per_second": 13.239, | |
| "eval_steps_per_second": 3.31, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 134.00028252788104, | |
| "grad_norm": 0.9201573729515076, | |
| "learning_rate": 9.810213265505772e-05, | |
| "loss": 0.3505, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 135.00014869888477, | |
| "grad_norm": 1.5801942348480225, | |
| "learning_rate": 9.8063001369595e-05, | |
| "loss": 0.3398, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 135.00014869888477, | |
| "eval_accuracy": 0.8579610019182543, | |
| "eval_loss": 1.500423789024353, | |
| "eval_macro_f1": 0.6573863845637962, | |
| "eval_runtime": 50.47, | |
| "eval_samples_per_second": 13.315, | |
| "eval_steps_per_second": 3.329, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 136.00001486988847, | |
| "grad_norm": 1.5136662721633911, | |
| "learning_rate": 9.802387008413226e-05, | |
| "loss": 0.3379, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 136.00038661710036, | |
| "grad_norm": 2.073296070098877, | |
| "learning_rate": 9.798473879866954e-05, | |
| "loss": 0.3424, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 136.00038661710036, | |
| "eval_accuracy": 0.8589703018470214, | |
| "eval_loss": 1.500227928161621, | |
| "eval_macro_f1": 0.6582180779919148, | |
| "eval_runtime": 50.2017, | |
| "eval_samples_per_second": 13.386, | |
| "eval_steps_per_second": 3.346, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 137.0002527881041, | |
| "grad_norm": 2.330988883972168, | |
| "learning_rate": 9.794560751320681e-05, | |
| "loss": 0.3388, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 138.0001189591078, | |
| "grad_norm": 1.1455012559890747, | |
| "learning_rate": 9.790647622774408e-05, | |
| "loss": 0.3444, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 138.0001189591078, | |
| "eval_accuracy": 0.8580753729006343, | |
| "eval_loss": 1.5448076725006104, | |
| "eval_macro_f1": 0.659930109636158, | |
| "eval_runtime": 49.8683, | |
| "eval_samples_per_second": 13.475, | |
| "eval_steps_per_second": 3.369, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 138.0004907063197, | |
| "grad_norm": 2.7329256534576416, | |
| "learning_rate": 9.786734494228135e-05, | |
| "loss": 0.3312, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 139.0003568773234, | |
| "grad_norm": 1.2961472272872925, | |
| "learning_rate": 9.782821365681864e-05, | |
| "loss": 0.3344, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 139.0003568773234, | |
| "eval_accuracy": 0.8541455048920026, | |
| "eval_loss": 1.523103952407837, | |
| "eval_macro_f1": 0.6645437069408668, | |
| "eval_runtime": 48.9673, | |
| "eval_samples_per_second": 13.723, | |
| "eval_steps_per_second": 3.431, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 140.00022304832714, | |
| "grad_norm": 1.2091388702392578, | |
| "learning_rate": 9.77890823713559e-05, | |
| "loss": 0.3396, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 141.00008921933085, | |
| "grad_norm": 1.0885772705078125, | |
| "learning_rate": 9.774995108589318e-05, | |
| "loss": 0.3341, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 141.00008921933085, | |
| "eval_accuracy": 0.859411708208793, | |
| "eval_loss": 1.5716527700424194, | |
| "eval_macro_f1": 0.6649711044669897, | |
| "eval_runtime": 48.314, | |
| "eval_samples_per_second": 13.909, | |
| "eval_steps_per_second": 3.477, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 141.00046096654276, | |
| "grad_norm": 0.9991358518600464, | |
| "learning_rate": 9.771081980043045e-05, | |
| "loss": 0.3292, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 142.00032713754646, | |
| "grad_norm": 0.9725756049156189, | |
| "learning_rate": 9.767168851496772e-05, | |
| "loss": 0.3259, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 142.00032713754646, | |
| "eval_accuracy": 0.8582671230558392, | |
| "eval_loss": 1.5597984790802002, | |
| "eval_macro_f1": 0.66374123938933, | |
| "eval_runtime": 48.6249, | |
| "eval_samples_per_second": 13.82, | |
| "eval_steps_per_second": 3.455, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 143.0001933085502, | |
| "grad_norm": 1.9716778993606567, | |
| "learning_rate": 9.7632557229505e-05, | |
| "loss": 0.3214, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 144.0000594795539, | |
| "grad_norm": 1.3841463327407837, | |
| "learning_rate": 9.759342594404227e-05, | |
| "loss": 0.3243, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 144.0000594795539, | |
| "eval_accuracy": 0.8594564950163417, | |
| "eval_loss": 1.600425362586975, | |
| "eval_macro_f1": 0.6602272364365409, | |
| "eval_runtime": 49.0882, | |
| "eval_samples_per_second": 13.69, | |
| "eval_steps_per_second": 3.422, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 144.0004312267658, | |
| "grad_norm": 1.2652596235275269, | |
| "learning_rate": 9.755429465857954e-05, | |
| "loss": 0.3195, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 145.00029739776951, | |
| "grad_norm": 0.9097074866294861, | |
| "learning_rate": 9.751516337311681e-05, | |
| "loss": 0.3236, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 145.00029739776951, | |
| "eval_accuracy": 0.8547185372038479, | |
| "eval_loss": 1.5480947494506836, | |
| "eval_macro_f1": 0.6692649675278269, | |
| "eval_runtime": 48.7778, | |
| "eval_samples_per_second": 13.777, | |
| "eval_steps_per_second": 3.444, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 146.00016356877325, | |
| "grad_norm": 1.3369333744049072, | |
| "learning_rate": 9.747603208765408e-05, | |
| "loss": 0.3203, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 147.00002973977695, | |
| "grad_norm": 0.7002597451210022, | |
| "learning_rate": 9.743690080219135e-05, | |
| "loss": 0.3212, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 147.00002973977695, | |
| "eval_accuracy": 0.8636582594164792, | |
| "eval_loss": 1.4696401357650757, | |
| "eval_macro_f1": 0.669836854595993, | |
| "eval_runtime": 49.8793, | |
| "eval_samples_per_second": 13.473, | |
| "eval_steps_per_second": 3.368, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 147.00040148698884, | |
| "grad_norm": 0.966150164604187, | |
| "learning_rate": 9.739776951672863e-05, | |
| "loss": 0.314, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 148.00026765799257, | |
| "grad_norm": 0.9162842631340027, | |
| "learning_rate": 9.735863823126589e-05, | |
| "loss": 0.3137, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 148.00026765799257, | |
| "eval_accuracy": 0.8518328815305616, | |
| "eval_loss": 1.57254958152771, | |
| "eval_macro_f1": 0.6745871026457042, | |
| "eval_runtime": 48.5751, | |
| "eval_samples_per_second": 13.834, | |
| "eval_steps_per_second": 3.459, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 149.00013382899627, | |
| "grad_norm": 1.1828657388687134, | |
| "learning_rate": 9.731950694580317e-05, | |
| "loss": 0.3192, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 149.00050557620818, | |
| "grad_norm": 1.726320505142212, | |
| "learning_rate": 9.728037566034044e-05, | |
| "loss": 0.3205, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 149.00050557620818, | |
| "eval_accuracy": 0.8620538510229856, | |
| "eval_loss": 1.4266021251678467, | |
| "eval_macro_f1": 0.6685838494494656, | |
| "eval_runtime": 48.1524, | |
| "eval_samples_per_second": 13.956, | |
| "eval_steps_per_second": 3.489, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 150.0003717472119, | |
| "grad_norm": 1.412037968635559, | |
| "learning_rate": 9.724124437487771e-05, | |
| "loss": 0.3112, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 151.00023791821562, | |
| "grad_norm": 1.0832695960998535, | |
| "learning_rate": 9.7202113089415e-05, | |
| "loss": 0.3102, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 151.00023791821562, | |
| "eval_accuracy": 0.8641672940325766, | |
| "eval_loss": 1.574267029762268, | |
| "eval_macro_f1": 0.6709263512273164, | |
| "eval_runtime": 48.5859, | |
| "eval_samples_per_second": 13.831, | |
| "eval_steps_per_second": 3.458, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 152.00010408921932, | |
| "grad_norm": 0.9826604127883911, | |
| "learning_rate": 9.716298180395227e-05, | |
| "loss": 0.3152, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 152.00047583643124, | |
| "grad_norm": 1.4301373958587646, | |
| "learning_rate": 9.712385051848954e-05, | |
| "loss": 0.3129, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 152.00047583643124, | |
| "eval_accuracy": 0.8644154699933324, | |
| "eval_loss": 1.5302149057388306, | |
| "eval_macro_f1": 0.6709705649680309, | |
| "eval_runtime": 48.0303, | |
| "eval_samples_per_second": 13.991, | |
| "eval_steps_per_second": 3.498, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 153.00034200743494, | |
| "grad_norm": 1.2386184930801392, | |
| "learning_rate": 9.708471923302681e-05, | |
| "loss": 0.3126, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 154.00020817843867, | |
| "grad_norm": 1.0621957778930664, | |
| "learning_rate": 9.704558794756409e-05, | |
| "loss": 0.3094, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 154.00020817843867, | |
| "eval_accuracy": 0.8608746636189569, | |
| "eval_loss": 1.5115866661071777, | |
| "eval_macro_f1": 0.6730138894100086, | |
| "eval_runtime": 47.7743, | |
| "eval_samples_per_second": 14.066, | |
| "eval_steps_per_second": 3.517, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 155.00007434944237, | |
| "grad_norm": 1.3560012578964233, | |
| "learning_rate": 9.700645666210135e-05, | |
| "loss": 0.2996, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 155.0004460966543, | |
| "grad_norm": 0.7811614274978638, | |
| "learning_rate": 9.696732537663863e-05, | |
| "loss": 0.3058, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 155.0004460966543, | |
| "eval_accuracy": 0.860774923910523, | |
| "eval_loss": 1.5835298299789429, | |
| "eval_macro_f1": 0.6707021458940838, | |
| "eval_runtime": 47.8347, | |
| "eval_samples_per_second": 14.048, | |
| "eval_steps_per_second": 3.512, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 156.000312267658, | |
| "grad_norm": 1.2949461936950684, | |
| "learning_rate": 9.692819409117589e-05, | |
| "loss": 0.3079, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 157.00017843866172, | |
| "grad_norm": 0.860791027545929, | |
| "learning_rate": 9.688906280571317e-05, | |
| "loss": 0.3063, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 157.00017843866172, | |
| "eval_accuracy": 0.8623498068108009, | |
| "eval_loss": 1.6110292673110962, | |
| "eval_macro_f1": 0.6705726826881232, | |
| "eval_runtime": 48.9902, | |
| "eval_samples_per_second": 13.717, | |
| "eval_steps_per_second": 3.429, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 158.00004460966542, | |
| "grad_norm": 2.020859956741333, | |
| "learning_rate": 9.684993152025044e-05, | |
| "loss": 0.3018, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 158.0004163568773, | |
| "grad_norm": 1.3179705142974854, | |
| "learning_rate": 9.681080023478771e-05, | |
| "loss": 0.2969, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 158.0004163568773, | |
| "eval_accuracy": 0.8589260328139017, | |
| "eval_loss": 1.658690333366394, | |
| "eval_macro_f1": 0.6674403813053171, | |
| "eval_runtime": 47.806, | |
| "eval_samples_per_second": 14.057, | |
| "eval_steps_per_second": 3.514, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 159.00028252788104, | |
| "grad_norm": 1.1617735624313354, | |
| "learning_rate": 9.6771668949325e-05, | |
| "loss": 0.3034, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 160.00014869888477, | |
| "grad_norm": 0.7395443320274353, | |
| "learning_rate": 9.673253766386227e-05, | |
| "loss": 0.3022, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 160.00014869888477, | |
| "eval_accuracy": 0.8604493924135389, | |
| "eval_loss": 1.7630372047424316, | |
| "eval_macro_f1": 0.6697817878000056, | |
| "eval_runtime": 48.5778, | |
| "eval_samples_per_second": 13.833, | |
| "eval_steps_per_second": 3.458, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 161.00001486988847, | |
| "grad_norm": 1.5594608783721924, | |
| "learning_rate": 9.669340637839954e-05, | |
| "loss": 0.3029, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 161.00038661710036, | |
| "grad_norm": 1.3082185983657837, | |
| "learning_rate": 9.66542750929368e-05, | |
| "loss": 0.2979, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 161.00038661710036, | |
| "eval_accuracy": 0.8608462416731367, | |
| "eval_loss": 1.5527905225753784, | |
| "eval_macro_f1": 0.6727040248898062, | |
| "eval_runtime": 47.7197, | |
| "eval_samples_per_second": 14.082, | |
| "eval_steps_per_second": 3.521, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 162.0002527881041, | |
| "grad_norm": 0.7811280488967896, | |
| "learning_rate": 9.661514380747409e-05, | |
| "loss": 0.3009, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 163.0001189591078, | |
| "grad_norm": 1.0631964206695557, | |
| "learning_rate": 9.657601252201135e-05, | |
| "loss": 0.3002, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 163.0001189591078, | |
| "eval_accuracy": 0.8645934390800154, | |
| "eval_loss": 1.5464845895767212, | |
| "eval_macro_f1": 0.6708201336285708, | |
| "eval_runtime": 47.8382, | |
| "eval_samples_per_second": 14.047, | |
| "eval_steps_per_second": 3.512, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 163.0004907063197, | |
| "grad_norm": 2.354179620742798, | |
| "learning_rate": 9.653688123654863e-05, | |
| "loss": 0.3054, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 164.0003568773234, | |
| "grad_norm": 0.7585428953170776, | |
| "learning_rate": 9.64977499510859e-05, | |
| "loss": 0.2979, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 164.0003568773234, | |
| "eval_accuracy": 0.8666105756362361, | |
| "eval_loss": 1.6061186790466309, | |
| "eval_macro_f1": 0.6768795029988373, | |
| "eval_runtime": 47.6396, | |
| "eval_samples_per_second": 14.106, | |
| "eval_steps_per_second": 3.526, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 165.00022304832714, | |
| "grad_norm": 0.6630699634552002, | |
| "learning_rate": 9.645861866562317e-05, | |
| "loss": 0.29, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 166.00008921933085, | |
| "grad_norm": 1.263273000717163, | |
| "learning_rate": 9.641948738016044e-05, | |
| "loss": 0.2908, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 166.00008921933085, | |
| "eval_accuracy": 0.8643172326801727, | |
| "eval_loss": 1.6028649806976318, | |
| "eval_macro_f1": 0.6737691674037425, | |
| "eval_runtime": 48.4294, | |
| "eval_samples_per_second": 13.876, | |
| "eval_steps_per_second": 3.469, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 166.00046096654276, | |
| "grad_norm": 1.1215412616729736, | |
| "learning_rate": 9.638035609469771e-05, | |
| "loss": 0.2955, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 167.00032713754646, | |
| "grad_norm": 0.9751584529876709, | |
| "learning_rate": 9.634122480923498e-05, | |
| "loss": 0.2912, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 167.00032713754646, | |
| "eval_accuracy": 0.867831394314885, | |
| "eval_loss": 1.5405371189117432, | |
| "eval_macro_f1": 0.6776042669784504, | |
| "eval_runtime": 47.677, | |
| "eval_samples_per_second": 14.095, | |
| "eval_steps_per_second": 3.524, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 168.0001933085502, | |
| "grad_norm": 0.5147533416748047, | |
| "learning_rate": 9.630209352377226e-05, | |
| "loss": 0.3009, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 169.0000594795539, | |
| "grad_norm": 0.7095732688903809, | |
| "learning_rate": 9.626296223830953e-05, | |
| "loss": 0.2898, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 169.0000594795539, | |
| "eval_accuracy": 0.8701519012721739, | |
| "eval_loss": 1.5441175699234009, | |
| "eval_macro_f1": 0.6844898842204484, | |
| "eval_runtime": 48.1832, | |
| "eval_samples_per_second": 13.947, | |
| "eval_steps_per_second": 3.487, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 169.0004312267658, | |
| "grad_norm": 0.8257178068161011, | |
| "learning_rate": 9.62238309528468e-05, | |
| "loss": 0.2899, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 170.00029739776951, | |
| "grad_norm": 1.3418561220169067, | |
| "learning_rate": 9.618469966738409e-05, | |
| "loss": 0.2907, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 170.00029739776951, | |
| "eval_accuracy": 0.8683436627463533, | |
| "eval_loss": 1.5258067846298218, | |
| "eval_macro_f1": 0.6799004153156964, | |
| "eval_runtime": 48.3802, | |
| "eval_samples_per_second": 13.89, | |
| "eval_steps_per_second": 3.472, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 171.00016356877325, | |
| "grad_norm": 1.1356240510940552, | |
| "learning_rate": 9.614556838192134e-05, | |
| "loss": 0.2839, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 172.00002973977695, | |
| "grad_norm": 2.4852797985076904, | |
| "learning_rate": 9.610643709645863e-05, | |
| "loss": 0.2893, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 172.00002973977695, | |
| "eval_accuracy": 0.8644724453261277, | |
| "eval_loss": 1.5444488525390625, | |
| "eval_macro_f1": 0.6779267308558976, | |
| "eval_runtime": 47.933, | |
| "eval_samples_per_second": 14.02, | |
| "eval_steps_per_second": 3.505, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 172.00040148698884, | |
| "grad_norm": 0.7937450408935547, | |
| "learning_rate": 9.60673058109959e-05, | |
| "loss": 0.2875, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 173.00026765799257, | |
| "grad_norm": 1.3148819208145142, | |
| "learning_rate": 9.602817452553317e-05, | |
| "loss": 0.2848, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 173.00026765799257, | |
| "eval_accuracy": 0.8637417199497863, | |
| "eval_loss": 1.5945580005645752, | |
| "eval_macro_f1": 0.6752905458548558, | |
| "eval_runtime": 48.2353, | |
| "eval_samples_per_second": 13.932, | |
| "eval_steps_per_second": 3.483, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 174.00013382899627, | |
| "grad_norm": 1.375939965248108, | |
| "learning_rate": 9.598904324007044e-05, | |
| "loss": 0.2835, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 174.00050557620818, | |
| "grad_norm": 1.0491206645965576, | |
| "learning_rate": 9.594991195460772e-05, | |
| "loss": 0.2864, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 174.00050557620818, | |
| "eval_accuracy": 0.8652278114733596, | |
| "eval_loss": 1.5291390419006348, | |
| "eval_macro_f1": 0.6776906470006278, | |
| "eval_runtime": 47.1132, | |
| "eval_samples_per_second": 14.264, | |
| "eval_steps_per_second": 3.566, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 175.0003717472119, | |
| "grad_norm": 1.8452578783035278, | |
| "learning_rate": 9.591078066914498e-05, | |
| "loss": 0.2885, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 176.00023791821562, | |
| "grad_norm": 0.6999953985214233, | |
| "learning_rate": 9.587164938368226e-05, | |
| "loss": 0.2896, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 176.00023791821562, | |
| "eval_accuracy": 0.8645915023347069, | |
| "eval_loss": 1.5473123788833618, | |
| "eval_macro_f1": 0.6804909340063208, | |
| "eval_runtime": 48.4286, | |
| "eval_samples_per_second": 13.876, | |
| "eval_steps_per_second": 3.469, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 177.00010408921932, | |
| "grad_norm": 1.5631098747253418, | |
| "learning_rate": 9.583251809821953e-05, | |
| "loss": 0.2836, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 177.00047583643124, | |
| "grad_norm": 1.2296806573867798, | |
| "learning_rate": 9.57933868127568e-05, | |
| "loss": 0.2889, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 177.00047583643124, | |
| "eval_accuracy": 0.8661434049744006, | |
| "eval_loss": 1.6414021253585815, | |
| "eval_macro_f1": 0.6792450700800063, | |
| "eval_runtime": 48.85, | |
| "eval_samples_per_second": 13.756, | |
| "eval_steps_per_second": 3.439, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 178.00034200743494, | |
| "grad_norm": 0.6046510934829712, | |
| "learning_rate": 9.575425552729409e-05, | |
| "loss": 0.2849, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 179.00020817843867, | |
| "grad_norm": 1.4595069885253906, | |
| "learning_rate": 9.571512424183134e-05, | |
| "loss": 0.2821, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 179.00020817843867, | |
| "eval_accuracy": 0.8648826280775495, | |
| "eval_loss": 1.6235281229019165, | |
| "eval_macro_f1": 0.6799601201557365, | |
| "eval_runtime": 48.1603, | |
| "eval_samples_per_second": 13.953, | |
| "eval_steps_per_second": 3.488, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 180.00007434944237, | |
| "grad_norm": 0.7873971462249756, | |
| "learning_rate": 9.567599295636863e-05, | |
| "loss": 0.2828, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 180.0004460966543, | |
| "grad_norm": 0.6876779794692993, | |
| "learning_rate": 9.56368616709059e-05, | |
| "loss": 0.2824, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 180.0004460966543, | |
| "eval_accuracy": 0.8638875731209613, | |
| "eval_loss": 1.5444902181625366, | |
| "eval_macro_f1": 0.67770174790511, | |
| "eval_runtime": 48.1412, | |
| "eval_samples_per_second": 13.959, | |
| "eval_steps_per_second": 3.49, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 181.000312267658, | |
| "grad_norm": 0.6027410626411438, | |
| "learning_rate": 9.559773038544317e-05, | |
| "loss": 0.2794, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 182.00017843866172, | |
| "grad_norm": 0.8014593720436096, | |
| "learning_rate": 9.555859909998044e-05, | |
| "loss": 0.2774, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 182.00017843866172, | |
| "eval_accuracy": 0.8679265122778245, | |
| "eval_loss": 1.644963264465332, | |
| "eval_macro_f1": 0.6823268003968317, | |
| "eval_runtime": 47.9941, | |
| "eval_samples_per_second": 14.002, | |
| "eval_steps_per_second": 3.5, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 183.00004460966542, | |
| "grad_norm": 0.7129560112953186, | |
| "learning_rate": 9.551946781451772e-05, | |
| "loss": 0.2824, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 183.0004163568773, | |
| "grad_norm": 0.695976197719574, | |
| "learning_rate": 9.548033652905498e-05, | |
| "loss": 0.2776, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 183.0004163568773, | |
| "eval_accuracy": 0.8655064591347685, | |
| "eval_loss": 1.7040234804153442, | |
| "eval_macro_f1": 0.6769104108696231, | |
| "eval_runtime": 47.9023, | |
| "eval_samples_per_second": 14.029, | |
| "eval_steps_per_second": 3.507, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 184.00028252788104, | |
| "grad_norm": 0.7102384567260742, | |
| "learning_rate": 9.544120524359226e-05, | |
| "loss": 0.2812, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 185.00014869888477, | |
| "grad_norm": 0.7373831868171692, | |
| "learning_rate": 9.540207395812953e-05, | |
| "loss": 0.2801, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 185.00014869888477, | |
| "eval_accuracy": 0.86286967303127, | |
| "eval_loss": 1.6474841833114624, | |
| "eval_macro_f1": 0.6785408190937435, | |
| "eval_runtime": 48.1272, | |
| "eval_samples_per_second": 13.963, | |
| "eval_steps_per_second": 3.491, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 186.00001486988847, | |
| "grad_norm": 0.5094011425971985, | |
| "learning_rate": 9.53629426726668e-05, | |
| "loss": 0.2803, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 186.00038661710036, | |
| "grad_norm": 0.6538822650909424, | |
| "learning_rate": 9.532381138720407e-05, | |
| "loss": 0.2745, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 186.00038661710036, | |
| "eval_accuracy": 0.8650452479325981, | |
| "eval_loss": 1.5569231510162354, | |
| "eval_macro_f1": 0.677627388562232, | |
| "eval_runtime": 48.1347, | |
| "eval_samples_per_second": 13.961, | |
| "eval_steps_per_second": 3.49, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 187.0002527881041, | |
| "grad_norm": 0.5215421915054321, | |
| "learning_rate": 9.528468010174134e-05, | |
| "loss": 0.2805, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 188.0001189591078, | |
| "grad_norm": 1.654314637184143, | |
| "learning_rate": 9.524554881627862e-05, | |
| "loss": 0.2791, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 188.0001189591078, | |
| "eval_accuracy": 0.863827199940737, | |
| "eval_loss": 1.655357003211975, | |
| "eval_macro_f1": 0.6847853385351034, | |
| "eval_runtime": 47.6816, | |
| "eval_samples_per_second": 14.093, | |
| "eval_steps_per_second": 3.523, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 188.0004907063197, | |
| "grad_norm": 3.5391457080841064, | |
| "learning_rate": 9.52064175308159e-05, | |
| "loss": 0.2811, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 189.0003568773234, | |
| "grad_norm": 0.7586809992790222, | |
| "learning_rate": 9.516728624535316e-05, | |
| "loss": 0.2753, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 189.0003568773234, | |
| "eval_accuracy": 0.8674030325693027, | |
| "eval_loss": 1.5796079635620117, | |
| "eval_macro_f1": 0.6843978905909051, | |
| "eval_runtime": 47.7157, | |
| "eval_samples_per_second": 14.083, | |
| "eval_steps_per_second": 3.521, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 190.00022304832714, | |
| "grad_norm": 1.0755257606506348, | |
| "learning_rate": 9.512815495989043e-05, | |
| "loss": 0.2731, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 191.00008921933085, | |
| "grad_norm": 0.6228829026222229, | |
| "learning_rate": 9.508902367442772e-05, | |
| "loss": 0.2753, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 191.00008921933085, | |
| "eval_accuracy": 0.8665214023561504, | |
| "eval_loss": 1.6560512781143188, | |
| "eval_macro_f1": 0.6846732776298716, | |
| "eval_runtime": 48.0681, | |
| "eval_samples_per_second": 13.98, | |
| "eval_steps_per_second": 3.495, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 191.00046096654276, | |
| "grad_norm": 0.938022792339325, | |
| "learning_rate": 9.504989238896497e-05, | |
| "loss": 0.2709, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 192.00032713754646, | |
| "grad_norm": 0.7752746939659119, | |
| "learning_rate": 9.501076110350226e-05, | |
| "loss": 0.2757, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 192.00032713754646, | |
| "eval_accuracy": 0.8691043353883263, | |
| "eval_loss": 1.572521448135376, | |
| "eval_macro_f1": 0.6884560219258116, | |
| "eval_runtime": 47.3838, | |
| "eval_samples_per_second": 14.182, | |
| "eval_steps_per_second": 3.546, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 193.0001933085502, | |
| "grad_norm": 0.9790377616882324, | |
| "learning_rate": 9.497162981803953e-05, | |
| "loss": 0.2732, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 194.0000594795539, | |
| "grad_norm": 1.399014949798584, | |
| "learning_rate": 9.49324985325768e-05, | |
| "loss": 0.2715, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 194.0000594795539, | |
| "eval_accuracy": 0.8689936532336818, | |
| "eval_loss": 1.643375039100647, | |
| "eval_macro_f1": 0.6917083697924713, | |
| "eval_runtime": 47.6035, | |
| "eval_samples_per_second": 14.117, | |
| "eval_steps_per_second": 3.529, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 194.0004312267658, | |
| "grad_norm": 0.6019532084465027, | |
| "learning_rate": 9.489336724711407e-05, | |
| "loss": 0.2742, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 195.00029739776951, | |
| "grad_norm": 0.8591573238372803, | |
| "learning_rate": 9.485423596165135e-05, | |
| "loss": 0.2743, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 195.00029739776951, | |
| "eval_accuracy": 0.8696440189637991, | |
| "eval_loss": 1.6730353832244873, | |
| "eval_macro_f1": 0.6888831292939925, | |
| "eval_runtime": 47.1438, | |
| "eval_samples_per_second": 14.254, | |
| "eval_steps_per_second": 3.564, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 196.00016356877325, | |
| "grad_norm": 0.7350879907608032, | |
| "learning_rate": 9.481510467618862e-05, | |
| "loss": 0.2734, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 197.00002973977695, | |
| "grad_norm": 0.5490862727165222, | |
| "learning_rate": 9.477597339072589e-05, | |
| "loss": 0.2685, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 197.00002973977695, | |
| "eval_accuracy": 0.869489575266559, | |
| "eval_loss": 1.5646990537643433, | |
| "eval_macro_f1": 0.6848350674152234, | |
| "eval_runtime": 47.4949, | |
| "eval_samples_per_second": 14.149, | |
| "eval_steps_per_second": 3.537, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 197.00040148698884, | |
| "grad_norm": 0.7999712228775024, | |
| "learning_rate": 9.473684210526316e-05, | |
| "loss": 0.2687, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 198.00026765799257, | |
| "grad_norm": 1.20505690574646, | |
| "learning_rate": 9.469771081980043e-05, | |
| "loss": 0.2697, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 198.00026765799257, | |
| "eval_accuracy": 0.8686437550016163, | |
| "eval_loss": 1.5947939157485962, | |
| "eval_macro_f1": 0.6880418599230155, | |
| "eval_runtime": 47.6512, | |
| "eval_samples_per_second": 14.102, | |
| "eval_steps_per_second": 3.526, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 199.00013382899627, | |
| "grad_norm": 0.6366742849349976, | |
| "learning_rate": 9.465857953433772e-05, | |
| "loss": 0.2672, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 199.00050557620818, | |
| "grad_norm": 34.58500289916992, | |
| "learning_rate": 9.461944824887497e-05, | |
| "loss": 0.2794, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 199.00050557620818, | |
| "eval_accuracy": 0.8672534824226665, | |
| "eval_loss": 1.5313513278961182, | |
| "eval_macro_f1": 0.6841001571058899, | |
| "eval_runtime": 45.9918, | |
| "eval_samples_per_second": 14.611, | |
| "eval_steps_per_second": 3.653, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 200.0003717472119, | |
| "grad_norm": 1.101613163948059, | |
| "learning_rate": 9.458031696341226e-05, | |
| "loss": 0.2738, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 201.00023791821562, | |
| "grad_norm": 0.7829649448394775, | |
| "learning_rate": 9.454118567794953e-05, | |
| "loss": 0.2706, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 201.00023791821562, | |
| "eval_accuracy": 0.8678993615662772, | |
| "eval_loss": 1.605455994606018, | |
| "eval_macro_f1": 0.6885769863222782, | |
| "eval_runtime": 47.403, | |
| "eval_samples_per_second": 14.176, | |
| "eval_steps_per_second": 3.544, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 202.00010408921932, | |
| "grad_norm": 0.5832332968711853, | |
| "learning_rate": 9.45020543924868e-05, | |
| "loss": 0.2673, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 202.00047583643124, | |
| "grad_norm": 0.7719833850860596, | |
| "learning_rate": 9.446292310702407e-05, | |
| "loss": 0.266, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 202.00047583643124, | |
| "eval_accuracy": 0.8691502501234343, | |
| "eval_loss": 1.5864771604537964, | |
| "eval_macro_f1": 0.6932054967503338, | |
| "eval_runtime": 47.7144, | |
| "eval_samples_per_second": 14.084, | |
| "eval_steps_per_second": 3.521, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 203.00034200743494, | |
| "grad_norm": 0.8644407987594604, | |
| "learning_rate": 9.442379182156135e-05, | |
| "loss": 0.2679, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 204.00020817843867, | |
| "grad_norm": 0.7103060483932495, | |
| "learning_rate": 9.43846605360986e-05, | |
| "loss": 0.2689, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 204.00020817843867, | |
| "eval_accuracy": 0.8688131187854229, | |
| "eval_loss": 1.6281613111495972, | |
| "eval_macro_f1": 0.686904537208796, | |
| "eval_runtime": 46.631, | |
| "eval_samples_per_second": 14.411, | |
| "eval_steps_per_second": 3.603, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 205.00007434944237, | |
| "grad_norm": 1.3666083812713623, | |
| "learning_rate": 9.434552925063589e-05, | |
| "loss": 0.2655, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 205.0004460966543, | |
| "grad_norm": 0.6814075708389282, | |
| "learning_rate": 9.430639796517316e-05, | |
| "loss": 0.2674, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 205.0004460966543, | |
| "eval_accuracy": 0.8685227656776213, | |
| "eval_loss": 1.5942567586898804, | |
| "eval_macro_f1": 0.687446333114446, | |
| "eval_runtime": 47.6458, | |
| "eval_samples_per_second": 14.104, | |
| "eval_steps_per_second": 3.526, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 206.000312267658, | |
| "grad_norm": 0.704296886920929, | |
| "learning_rate": 9.426726667971043e-05, | |
| "loss": 0.2668, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 207.00017843866172, | |
| "grad_norm": 0.8048647046089172, | |
| "learning_rate": 9.422813539424771e-05, | |
| "loss": 0.27, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 207.00017843866172, | |
| "eval_accuracy": 0.8677061769198147, | |
| "eval_loss": 1.6559548377990723, | |
| "eval_macro_f1": 0.6895342478716917, | |
| "eval_runtime": 46.3001, | |
| "eval_samples_per_second": 14.514, | |
| "eval_steps_per_second": 3.629, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 208.00004460966542, | |
| "grad_norm": 1.0088293552398682, | |
| "learning_rate": 9.418900410878497e-05, | |
| "loss": 0.2692, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 208.0004163568773, | |
| "grad_norm": 0.6096265912055969, | |
| "learning_rate": 9.414987282332225e-05, | |
| "loss": 0.264, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 208.0004163568773, | |
| "eval_accuracy": 0.8624095986479511, | |
| "eval_loss": 1.72756028175354, | |
| "eval_macro_f1": 0.6788019065387273, | |
| "eval_runtime": 47.1371, | |
| "eval_samples_per_second": 14.256, | |
| "eval_steps_per_second": 3.564, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 209.00028252788104, | |
| "grad_norm": 0.8049959540367126, | |
| "learning_rate": 9.411074153785952e-05, | |
| "loss": 0.269, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 210.00014869888477, | |
| "grad_norm": 0.405304878950119, | |
| "learning_rate": 9.40716102523968e-05, | |
| "loss": 0.2613, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 210.00014869888477, | |
| "eval_accuracy": 0.8737539228529647, | |
| "eval_loss": 1.6797696352005005, | |
| "eval_macro_f1": 0.68707650553989, | |
| "eval_runtime": 47.3163, | |
| "eval_samples_per_second": 14.202, | |
| "eval_steps_per_second": 3.551, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 211.00001486988847, | |
| "grad_norm": 0.8155346512794495, | |
| "learning_rate": 9.403247896693406e-05, | |
| "loss": 0.2658, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 211.00038661710036, | |
| "grad_norm": 1.3639463186264038, | |
| "learning_rate": 9.399334768147135e-05, | |
| "loss": 0.2623, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 211.00038661710036, | |
| "eval_accuracy": 0.8712119696176184, | |
| "eval_loss": 1.5848718881607056, | |
| "eval_macro_f1": 0.6890734610343825, | |
| "eval_runtime": 46.9022, | |
| "eval_samples_per_second": 14.328, | |
| "eval_steps_per_second": 3.582, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 212.0002527881041, | |
| "grad_norm": 2.1215269565582275, | |
| "learning_rate": 9.39542163960086e-05, | |
| "loss": 0.2632, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 213.0001189591078, | |
| "grad_norm": 2.304811477661133, | |
| "learning_rate": 9.391508511054589e-05, | |
| "loss": 0.2665, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 213.0001189591078, | |
| "eval_accuracy": 0.8663827562423165, | |
| "eval_loss": 1.670615553855896, | |
| "eval_macro_f1": 0.6867730543695441, | |
| "eval_runtime": 47.4401, | |
| "eval_samples_per_second": 14.165, | |
| "eval_steps_per_second": 3.541, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 213.0004907063197, | |
| "grad_norm": 0.5944399833679199, | |
| "learning_rate": 9.387595382508316e-05, | |
| "loss": 0.2623, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 214.0003568773234, | |
| "grad_norm": 0.8482255339622498, | |
| "learning_rate": 9.383682253962043e-05, | |
| "loss": 0.2624, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 214.0003568773234, | |
| "eval_accuracy": 0.8698217804945703, | |
| "eval_loss": 1.6686782836914062, | |
| "eval_macro_f1": 0.6904881778891221, | |
| "eval_runtime": 47.1229, | |
| "eval_samples_per_second": 14.261, | |
| "eval_steps_per_second": 3.565, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 215.00022304832714, | |
| "grad_norm": 0.43856558203697205, | |
| "learning_rate": 9.379769125415771e-05, | |
| "loss": 0.2622, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 216.00008921933085, | |
| "grad_norm": 0.8466907739639282, | |
| "learning_rate": 9.375855996869498e-05, | |
| "loss": 0.2579, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 216.00008921933085, | |
| "eval_accuracy": 0.8698276830776899, | |
| "eval_loss": 1.5432207584381104, | |
| "eval_macro_f1": 0.693393345872542, | |
| "eval_runtime": 47.3775, | |
| "eval_samples_per_second": 14.184, | |
| "eval_steps_per_second": 3.546, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 216.00046096654276, | |
| "grad_norm": 1.5707125663757324, | |
| "learning_rate": 9.371942868323225e-05, | |
| "loss": 0.2628, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 217.00032713754646, | |
| "grad_norm": 0.5471277832984924, | |
| "learning_rate": 9.368029739776952e-05, | |
| "loss": 0.2672, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 217.00032713754646, | |
| "eval_accuracy": 0.8655290700380827, | |
| "eval_loss": 1.6244316101074219, | |
| "eval_macro_f1": 0.6921881782988438, | |
| "eval_runtime": 47.6498, | |
| "eval_samples_per_second": 14.103, | |
| "eval_steps_per_second": 3.526, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 218.0001933085502, | |
| "grad_norm": 0.6579570770263672, | |
| "learning_rate": 9.364116611230679e-05, | |
| "loss": 0.2581, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 219.0000594795539, | |
| "grad_norm": 1.0227067470550537, | |
| "learning_rate": 9.360203482684406e-05, | |
| "loss": 0.2673, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 219.0000594795539, | |
| "eval_accuracy": 0.8725138688744463, | |
| "eval_loss": 1.5025845766067505, | |
| "eval_macro_f1": 0.6950262696385097, | |
| "eval_runtime": 47.57, | |
| "eval_samples_per_second": 14.127, | |
| "eval_steps_per_second": 3.532, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 219.0004312267658, | |
| "grad_norm": 0.7601364254951477, | |
| "learning_rate": 9.356290354138135e-05, | |
| "loss": 0.2646, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 220.00029739776951, | |
| "grad_norm": 1.6235822439193726, | |
| "learning_rate": 9.35237722559186e-05, | |
| "loss": 0.2657, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 220.00029739776951, | |
| "eval_accuracy": 0.8726696606327754, | |
| "eval_loss": 1.6585569381713867, | |
| "eval_macro_f1": 0.6917753475326269, | |
| "eval_runtime": 47.7138, | |
| "eval_samples_per_second": 14.084, | |
| "eval_steps_per_second": 3.521, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 221.00016356877325, | |
| "grad_norm": 0.4802096486091614, | |
| "learning_rate": 9.348464097045589e-05, | |
| "loss": 0.2562, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 222.00002973977695, | |
| "grad_norm": 1.0543347597122192, | |
| "learning_rate": 9.344550968499316e-05, | |
| "loss": 0.2598, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 222.00002973977695, | |
| "eval_accuracy": 0.8727395565132572, | |
| "eval_loss": 1.6446582078933716, | |
| "eval_macro_f1": 0.6932911685254688, | |
| "eval_runtime": 47.6711, | |
| "eval_samples_per_second": 14.097, | |
| "eval_steps_per_second": 3.524, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 222.00040148698884, | |
| "grad_norm": 0.8065938949584961, | |
| "learning_rate": 9.340637839953043e-05, | |
| "loss": 0.2599, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 223.00026765799257, | |
| "grad_norm": 0.8375520706176758, | |
| "learning_rate": 9.33672471140677e-05, | |
| "loss": 0.2621, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 223.00026765799257, | |
| "eval_accuracy": 0.8728216475795783, | |
| "eval_loss": 1.6157622337341309, | |
| "eval_macro_f1": 0.6932958030431495, | |
| "eval_runtime": 47.7404, | |
| "eval_samples_per_second": 14.076, | |
| "eval_steps_per_second": 3.519, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 224.00013382899627, | |
| "grad_norm": 0.5836125612258911, | |
| "learning_rate": 9.332811582860498e-05, | |
| "loss": 0.2575, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 224.00050557620818, | |
| "grad_norm": 1.7909495830535889, | |
| "learning_rate": 9.328898454314225e-05, | |
| "loss": 0.256, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 224.00050557620818, | |
| "eval_accuracy": 0.8688527520082964, | |
| "eval_loss": 1.7035082578659058, | |
| "eval_macro_f1": 0.6917661335156897, | |
| "eval_runtime": 46.9397, | |
| "eval_samples_per_second": 14.316, | |
| "eval_steps_per_second": 3.579, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 225.0003717472119, | |
| "grad_norm": 0.9828186631202698, | |
| "learning_rate": 9.324985325767952e-05, | |
| "loss": 0.2583, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 226.00023791821562, | |
| "grad_norm": 0.4985399544239044, | |
| "learning_rate": 9.32107219722168e-05, | |
| "loss": 0.2578, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 226.00023791821562, | |
| "eval_accuracy": 0.8719250847337479, | |
| "eval_loss": 1.5340908765792847, | |
| "eval_macro_f1": 0.6947860278625747, | |
| "eval_runtime": 47.7352, | |
| "eval_samples_per_second": 14.078, | |
| "eval_steps_per_second": 3.519, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 227.00010408921932, | |
| "grad_norm": 0.8708937168121338, | |
| "learning_rate": 9.317159068675406e-05, | |
| "loss": 0.255, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 227.00047583643124, | |
| "grad_norm": 1.0979160070419312, | |
| "learning_rate": 9.313245940129134e-05, | |
| "loss": 0.2552, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 227.00047583643124, | |
| "eval_accuracy": 0.8702558461586529, | |
| "eval_loss": 1.6572105884552002, | |
| "eval_macro_f1": 0.6917563353825633, | |
| "eval_runtime": 47.7184, | |
| "eval_samples_per_second": 14.083, | |
| "eval_steps_per_second": 3.521, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 228.00034200743494, | |
| "grad_norm": 0.43901586532592773, | |
| "learning_rate": 9.30933281158286e-05, | |
| "loss": 0.2572, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 229.00020817843867, | |
| "grad_norm": 0.9299412369728088, | |
| "learning_rate": 9.305419683036588e-05, | |
| "loss": 0.2551, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 229.00020817843867, | |
| "eval_accuracy": 0.8717616878137655, | |
| "eval_loss": 1.606767177581787, | |
| "eval_macro_f1": 0.6934037548534984, | |
| "eval_runtime": 47.3853, | |
| "eval_samples_per_second": 14.182, | |
| "eval_steps_per_second": 3.545, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 230.00007434944237, | |
| "grad_norm": 0.5207256078720093, | |
| "learning_rate": 9.301506554490315e-05, | |
| "loss": 0.2576, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 230.0004460966543, | |
| "grad_norm": 0.6443852782249451, | |
| "learning_rate": 9.297593425944042e-05, | |
| "loss": 0.263, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 230.0004460966543, | |
| "eval_accuracy": 0.8718295420556332, | |
| "eval_loss": 1.744031310081482, | |
| "eval_macro_f1": 0.6939710912655463, | |
| "eval_runtime": 47.6301, | |
| "eval_samples_per_second": 14.109, | |
| "eval_steps_per_second": 3.527, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 231.000312267658, | |
| "grad_norm": 0.6609066724777222, | |
| "learning_rate": 9.29368029739777e-05, | |
| "loss": 0.2591, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 232.00017843866172, | |
| "grad_norm": 0.614456295967102, | |
| "learning_rate": 9.289767168851498e-05, | |
| "loss": 0.2567, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 232.00017843866172, | |
| "eval_accuracy": 0.8708844038546723, | |
| "eval_loss": 1.5864840745925903, | |
| "eval_macro_f1": 0.6954124817053439, | |
| "eval_runtime": 47.5438, | |
| "eval_samples_per_second": 14.134, | |
| "eval_steps_per_second": 3.534, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 233.00004460966542, | |
| "grad_norm": 0.9560025334358215, | |
| "learning_rate": 9.285854040305225e-05, | |
| "loss": 0.2558, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 233.0004163568773, | |
| "grad_norm": 0.9449219703674316, | |
| "learning_rate": 9.281940911758952e-05, | |
| "loss": 0.2538, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 233.0004163568773, | |
| "eval_accuracy": 0.8691010985986889, | |
| "eval_loss": 1.6324706077575684, | |
| "eval_macro_f1": 0.6932624958186178, | |
| "eval_runtime": 47.7799, | |
| "eval_samples_per_second": 14.064, | |
| "eval_steps_per_second": 3.516, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 234.00028252788104, | |
| "grad_norm": 0.6615203022956848, | |
| "learning_rate": 9.278027783212679e-05, | |
| "loss": 0.2562, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 235.00014869888477, | |
| "grad_norm": 0.7397301197052002, | |
| "learning_rate": 9.274114654666406e-05, | |
| "loss": 0.2534, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 235.00014869888477, | |
| "eval_accuracy": 0.8692960057739556, | |
| "eval_loss": 1.5722757577896118, | |
| "eval_macro_f1": 0.692964101120355, | |
| "eval_runtime": 47.7627, | |
| "eval_samples_per_second": 14.07, | |
| "eval_steps_per_second": 3.517, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 236.00001486988847, | |
| "grad_norm": 0.55063796043396, | |
| "learning_rate": 9.270201526120134e-05, | |
| "loss": 0.2542, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 236.00038661710036, | |
| "grad_norm": 0.7421992421150208, | |
| "learning_rate": 9.266288397573861e-05, | |
| "loss": 0.254, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 236.00038661710036, | |
| "eval_accuracy": 0.8703440842902799, | |
| "eval_loss": 1.6220040321350098, | |
| "eval_macro_f1": 0.6946437608735907, | |
| "eval_runtime": 47.4085, | |
| "eval_samples_per_second": 14.175, | |
| "eval_steps_per_second": 3.544, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 237.0002527881041, | |
| "grad_norm": 0.8659176230430603, | |
| "learning_rate": 9.262375269027588e-05, | |
| "loss": 0.2528, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 238.0001189591078, | |
| "grad_norm": 0.7209817171096802, | |
| "learning_rate": 9.258462140481315e-05, | |
| "loss": 0.2509, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 238.0001189591078, | |
| "eval_accuracy": 0.8701781168352621, | |
| "eval_loss": 1.7125494480133057, | |
| "eval_macro_f1": 0.6885334728232957, | |
| "eval_runtime": 47.6343, | |
| "eval_samples_per_second": 14.107, | |
| "eval_steps_per_second": 3.527, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 238.0004907063197, | |
| "grad_norm": 0.5732719898223877, | |
| "learning_rate": 9.254549011935042e-05, | |
| "loss": 0.2507, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 239.0003568773234, | |
| "grad_norm": 0.7329777479171753, | |
| "learning_rate": 9.250635883388769e-05, | |
| "loss": 0.2513, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 239.0003568773234, | |
| "eval_accuracy": 0.8709371142216245, | |
| "eval_loss": 1.701737880706787, | |
| "eval_macro_f1": 0.6957311110906366, | |
| "eval_runtime": 47.2624, | |
| "eval_samples_per_second": 14.218, | |
| "eval_steps_per_second": 3.555, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 240.00022304832714, | |
| "grad_norm": 0.6779870986938477, | |
| "learning_rate": 9.246722754842498e-05, | |
| "loss": 0.2555, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 241.00008921933085, | |
| "grad_norm": 0.9598796963691711, | |
| "learning_rate": 9.242809626296223e-05, | |
| "loss": 0.2567, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 241.00008921933085, | |
| "eval_accuracy": 0.8710340352265105, | |
| "eval_loss": 1.658144235610962, | |
| "eval_macro_f1": 0.6995813339320679, | |
| "eval_runtime": 47.5149, | |
| "eval_samples_per_second": 14.143, | |
| "eval_steps_per_second": 3.536, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 241.00046096654276, | |
| "grad_norm": 0.5498836040496826, | |
| "learning_rate": 9.238896497749952e-05, | |
| "loss": 0.2576, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 242.00032713754646, | |
| "grad_norm": 1.8321760892868042, | |
| "learning_rate": 9.234983369203679e-05, | |
| "loss": 0.2579, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 242.00032713754646, | |
| "eval_accuracy": 0.8703391367743829, | |
| "eval_loss": 1.6368268728256226, | |
| "eval_macro_f1": 0.6961760325639348, | |
| "eval_runtime": 47.3632, | |
| "eval_samples_per_second": 14.188, | |
| "eval_steps_per_second": 3.547, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 243.0001933085502, | |
| "grad_norm": 0.702711820602417, | |
| "learning_rate": 9.231070240657406e-05, | |
| "loss": 0.2579, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 244.0000594795539, | |
| "grad_norm": 0.6555370092391968, | |
| "learning_rate": 9.227157112111134e-05, | |
| "loss": 0.2665, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 244.0000594795539, | |
| "eval_accuracy": 0.86594513616592, | |
| "eval_loss": 1.5685354471206665, | |
| "eval_macro_f1": 0.6927762838387244, | |
| "eval_runtime": 47.4037, | |
| "eval_samples_per_second": 14.176, | |
| "eval_steps_per_second": 3.544, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 244.0004312267658, | |
| "grad_norm": 1.0492157936096191, | |
| "learning_rate": 9.223243983564861e-05, | |
| "loss": 0.2569, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 245.00029739776951, | |
| "grad_norm": 0.9604139924049377, | |
| "learning_rate": 9.219330855018588e-05, | |
| "loss": 0.2625, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 245.00029739776951, | |
| "eval_accuracy": 0.8723870633974999, | |
| "eval_loss": 1.6895173788070679, | |
| "eval_macro_f1": 0.6972317831696635, | |
| "eval_runtime": 46.9034, | |
| "eval_samples_per_second": 14.327, | |
| "eval_steps_per_second": 3.582, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 246.00016356877325, | |
| "grad_norm": 0.5593128204345703, | |
| "learning_rate": 9.215417726472315e-05, | |
| "loss": 0.2577, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 247.00002973977695, | |
| "grad_norm": 0.554602861404419, | |
| "learning_rate": 9.211504597926043e-05, | |
| "loss": 0.2539, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 247.00002973977695, | |
| "eval_accuracy": 0.8695801039488791, | |
| "eval_loss": 1.5884020328521729, | |
| "eval_macro_f1": 0.695830481158142, | |
| "eval_runtime": 47.3906, | |
| "eval_samples_per_second": 14.18, | |
| "eval_steps_per_second": 3.545, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 247.00040148698884, | |
| "grad_norm": 0.44518184661865234, | |
| "learning_rate": 9.207591469379769e-05, | |
| "loss": 0.25, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 248.00026765799257, | |
| "grad_norm": 0.5679718852043152, | |
| "learning_rate": 9.203678340833497e-05, | |
| "loss": 0.252, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 248.00026765799257, | |
| "eval_accuracy": 0.8730579371173861, | |
| "eval_loss": 1.6707909107208252, | |
| "eval_macro_f1": 0.6944050366018404, | |
| "eval_runtime": 47.3149, | |
| "eval_samples_per_second": 14.203, | |
| "eval_steps_per_second": 3.551, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 249.00013382899627, | |
| "grad_norm": 0.7690138220787048, | |
| "learning_rate": 9.199765212287223e-05, | |
| "loss": 0.2525, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 249.00050557620818, | |
| "grad_norm": 0.6587095260620117, | |
| "learning_rate": 9.195852083740951e-05, | |
| "loss": 0.2476, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 249.00050557620818, | |
| "eval_accuracy": 0.8728623895233069, | |
| "eval_loss": 1.628631353378296, | |
| "eval_macro_f1": 0.6983235583063505, | |
| "eval_runtime": 46.8372, | |
| "eval_samples_per_second": 14.348, | |
| "eval_steps_per_second": 3.587, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 250.0003717472119, | |
| "grad_norm": 0.5215486288070679, | |
| "learning_rate": 9.191938955194678e-05, | |
| "loss": 0.2457, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 251.00023791821562, | |
| "grad_norm": 0.7685202956199646, | |
| "learning_rate": 9.188025826648405e-05, | |
| "loss": 0.2486, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 251.00023791821562, | |
| "eval_accuracy": 0.875059372092214, | |
| "eval_loss": 1.6925468444824219, | |
| "eval_macro_f1": 0.6942957860515563, | |
| "eval_runtime": 47.6893, | |
| "eval_samples_per_second": 14.091, | |
| "eval_steps_per_second": 3.523, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 252.00010408921932, | |
| "grad_norm": 0.5681723356246948, | |
| "learning_rate": 9.184112698102134e-05, | |
| "loss": 0.2501, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 252.00047583643124, | |
| "grad_norm": 0.766886293888092, | |
| "learning_rate": 9.180199569555861e-05, | |
| "loss": 0.2503, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 252.00047583643124, | |
| "eval_accuracy": 0.8755406525042265, | |
| "eval_loss": 1.5906827449798584, | |
| "eval_macro_f1": 0.6991550920082847, | |
| "eval_runtime": 47.5816, | |
| "eval_samples_per_second": 14.123, | |
| "eval_steps_per_second": 3.531, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 253.00034200743494, | |
| "grad_norm": 1.2093149423599243, | |
| "learning_rate": 9.176286441009588e-05, | |
| "loss": 0.2485, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 254.00020817843867, | |
| "grad_norm": 0.5527441501617432, | |
| "learning_rate": 9.172373312463315e-05, | |
| "loss": 0.2489, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 254.00020817843867, | |
| "eval_accuracy": 0.8714815968328208, | |
| "eval_loss": 1.7195326089859009, | |
| "eval_macro_f1": 0.6970162423490259, | |
| "eval_runtime": 47.4478, | |
| "eval_samples_per_second": 14.163, | |
| "eval_steps_per_second": 3.541, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 255.00007434944237, | |
| "grad_norm": 0.45948362350463867, | |
| "learning_rate": 9.168460183917043e-05, | |
| "loss": 0.2439, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 255.0004460966543, | |
| "grad_norm": 0.6823854446411133, | |
| "learning_rate": 9.164547055370769e-05, | |
| "loss": 0.2457, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 255.0004460966543, | |
| "eval_accuracy": 0.8765321124358997, | |
| "eval_loss": 1.636742353439331, | |
| "eval_macro_f1": 0.699459918703319, | |
| "eval_runtime": 47.7938, | |
| "eval_samples_per_second": 14.06, | |
| "eval_steps_per_second": 3.515, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 256.000312267658, | |
| "grad_norm": 0.5651408433914185, | |
| "learning_rate": 9.160633926824497e-05, | |
| "loss": 0.2428, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 257.0001784386617, | |
| "grad_norm": 0.7243895530700684, | |
| "learning_rate": 9.156720798278224e-05, | |
| "loss": 0.2517, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 257.0001784386617, | |
| "eval_accuracy": 0.8649081285942019, | |
| "eval_loss": 1.766264796257019, | |
| "eval_macro_f1": 0.6946266382039408, | |
| "eval_runtime": 47.9299, | |
| "eval_samples_per_second": 14.02, | |
| "eval_steps_per_second": 3.505, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 258.00004460966545, | |
| "grad_norm": 0.6043080687522888, | |
| "learning_rate": 9.152807669731951e-05, | |
| "loss": 0.2468, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 258.00041635687734, | |
| "grad_norm": 0.5234358906745911, | |
| "learning_rate": 9.148894541185678e-05, | |
| "loss": 0.2414, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 258.00041635687734, | |
| "eval_accuracy": 0.8765574733951784, | |
| "eval_loss": 1.6841180324554443, | |
| "eval_macro_f1": 0.7006466360850203, | |
| "eval_runtime": 48.1158, | |
| "eval_samples_per_second": 13.966, | |
| "eval_steps_per_second": 3.492, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 259.00028252788104, | |
| "grad_norm": 0.5486935973167419, | |
| "learning_rate": 9.144981412639405e-05, | |
| "loss": 0.245, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 260.00014869888474, | |
| "grad_norm": 0.5566163063049316, | |
| "learning_rate": 9.141068284093132e-05, | |
| "loss": 0.2462, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 260.00014869888474, | |
| "eval_accuracy": 0.8747919852117518, | |
| "eval_loss": 1.6303309202194214, | |
| "eval_macro_f1": 0.7016985634720629, | |
| "eval_runtime": 48.1791, | |
| "eval_samples_per_second": 13.948, | |
| "eval_steps_per_second": 3.487, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 261.0000148698885, | |
| "grad_norm": 0.6687771677970886, | |
| "learning_rate": 9.13715515554686e-05, | |
| "loss": 0.2471, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 261.0003866171004, | |
| "grad_norm": 0.799909234046936, | |
| "learning_rate": 9.133242027000587e-05, | |
| "loss": 0.2433, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 261.0003866171004, | |
| "eval_accuracy": 0.8727416528292241, | |
| "eval_loss": 1.6468901634216309, | |
| "eval_macro_f1": 0.6941630525987106, | |
| "eval_runtime": 47.9851, | |
| "eval_samples_per_second": 14.004, | |
| "eval_steps_per_second": 3.501, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 262.0002527881041, | |
| "grad_norm": 0.40486371517181396, | |
| "learning_rate": 9.129328898454315e-05, | |
| "loss": 0.2451, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 263.0001189591078, | |
| "grad_norm": 0.6486377120018005, | |
| "learning_rate": 9.125415769908043e-05, | |
| "loss": 0.2461, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 263.0001189591078, | |
| "eval_accuracy": 0.871701417327654, | |
| "eval_loss": 1.6379040479660034, | |
| "eval_macro_f1": 0.6962308343355683, | |
| "eval_runtime": 47.5217, | |
| "eval_samples_per_second": 14.141, | |
| "eval_steps_per_second": 3.535, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 263.0004907063197, | |
| "grad_norm": 0.6154794096946716, | |
| "learning_rate": 9.121502641361769e-05, | |
| "loss": 0.2429, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 264.00035687732344, | |
| "grad_norm": 0.868390679359436, | |
| "learning_rate": 9.117589512815497e-05, | |
| "loss": 0.245, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 264.00035687732344, | |
| "eval_accuracy": 0.8723792366151837, | |
| "eval_loss": 1.644251823425293, | |
| "eval_macro_f1": 0.6975282371701368, | |
| "eval_runtime": 47.5594, | |
| "eval_samples_per_second": 14.13, | |
| "eval_steps_per_second": 3.532, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 265.00022304832714, | |
| "grad_norm": 0.9866790771484375, | |
| "learning_rate": 9.113676384269224e-05, | |
| "loss": 0.2421, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 266.00008921933085, | |
| "grad_norm": 0.8919183611869812, | |
| "learning_rate": 9.109763255722951e-05, | |
| "loss": 0.2463, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 266.00008921933085, | |
| "eval_accuracy": 0.876137634952197, | |
| "eval_loss": 1.63772714138031, | |
| "eval_macro_f1": 0.7015320633600683, | |
| "eval_runtime": 47.5587, | |
| "eval_samples_per_second": 14.13, | |
| "eval_steps_per_second": 3.532, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 266.00046096654273, | |
| "grad_norm": 0.6732456684112549, | |
| "learning_rate": 9.105850127176678e-05, | |
| "loss": 0.245, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 267.0003271375465, | |
| "grad_norm": 0.9901390671730042, | |
| "learning_rate": 9.101936998630406e-05, | |
| "loss": 0.2433, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 267.0003271375465, | |
| "eval_accuracy": 0.8707050401772733, | |
| "eval_loss": 1.7547792196273804, | |
| "eval_macro_f1": 0.6976431672619668, | |
| "eval_runtime": 48.0258, | |
| "eval_samples_per_second": 13.992, | |
| "eval_steps_per_second": 3.498, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 268.0001933085502, | |
| "grad_norm": 0.6878373622894287, | |
| "learning_rate": 9.098023870084132e-05, | |
| "loss": 0.2455, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 269.0000594795539, | |
| "grad_norm": 0.7168475985527039, | |
| "learning_rate": 9.09411074153786e-05, | |
| "loss": 0.2398, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 269.0000594795539, | |
| "eval_accuracy": 0.8749312429343421, | |
| "eval_loss": 1.5549407005310059, | |
| "eval_macro_f1": 0.6991405662687679, | |
| "eval_runtime": 47.4439, | |
| "eval_samples_per_second": 14.164, | |
| "eval_steps_per_second": 3.541, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 269.0004312267658, | |
| "grad_norm": 0.4068659245967865, | |
| "learning_rate": 9.090197612991587e-05, | |
| "loss": 0.2422, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 270.00029739776954, | |
| "grad_norm": 0.5065603852272034, | |
| "learning_rate": 9.086284484445314e-05, | |
| "loss": 0.242, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 270.00029739776954, | |
| "eval_accuracy": 0.8761036310202004, | |
| "eval_loss": 1.6925346851348877, | |
| "eval_macro_f1": 0.7020842188896902, | |
| "eval_runtime": 47.8897, | |
| "eval_samples_per_second": 14.032, | |
| "eval_steps_per_second": 3.508, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 271.00016356877325, | |
| "grad_norm": 0.6289934515953064, | |
| "learning_rate": 9.082371355899041e-05, | |
| "loss": 0.245, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 272.00002973977695, | |
| "grad_norm": 0.5134521126747131, | |
| "learning_rate": 9.078458227352768e-05, | |
| "loss": 0.2429, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 272.00002973977695, | |
| "eval_accuracy": 0.8727523810274236, | |
| "eval_loss": 1.7503160238265991, | |
| "eval_macro_f1": 0.6992435638111616, | |
| "eval_runtime": 47.9819, | |
| "eval_samples_per_second": 14.005, | |
| "eval_steps_per_second": 3.501, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 272.00040148698884, | |
| "grad_norm": 0.5185211300849915, | |
| "learning_rate": 9.074545098806497e-05, | |
| "loss": 0.2471, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 273.00026765799254, | |
| "grad_norm": 0.6770671606063843, | |
| "learning_rate": 9.070631970260224e-05, | |
| "loss": 0.2428, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 273.00026765799254, | |
| "eval_accuracy": 0.8747771988285471, | |
| "eval_loss": 1.6137670278549194, | |
| "eval_macro_f1": 0.7017125484073795, | |
| "eval_runtime": 48.0779, | |
| "eval_samples_per_second": 13.977, | |
| "eval_steps_per_second": 3.494, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 274.0001338289963, | |
| "grad_norm": 0.7311360836029053, | |
| "learning_rate": 9.066718841713951e-05, | |
| "loss": 0.242, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 274.0005055762082, | |
| "grad_norm": 0.8545860052108765, | |
| "learning_rate": 9.062805713167678e-05, | |
| "loss": 0.2442, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 274.0005055762082, | |
| "eval_accuracy": 0.875472191083495, | |
| "eval_loss": 1.582758903503418, | |
| "eval_macro_f1": 0.7032866342746882, | |
| "eval_runtime": 47.9354, | |
| "eval_samples_per_second": 14.019, | |
| "eval_steps_per_second": 3.505, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 275.0003717472119, | |
| "grad_norm": 0.5233895182609558, | |
| "learning_rate": 9.058892584621406e-05, | |
| "loss": 0.2428, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 276.0002379182156, | |
| "grad_norm": 0.750148355960846, | |
| "learning_rate": 9.054979456075132e-05, | |
| "loss": 0.25, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 276.0002379182156, | |
| "eval_accuracy": 0.8717148881461516, | |
| "eval_loss": 1.7209230661392212, | |
| "eval_macro_f1": 0.6960070669466294, | |
| "eval_runtime": 47.9374, | |
| "eval_samples_per_second": 14.018, | |
| "eval_steps_per_second": 3.505, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 277.00010408921935, | |
| "grad_norm": 0.6850331425666809, | |
| "learning_rate": 9.05106632752886e-05, | |
| "loss": 0.2445, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 277.00047583643124, | |
| "grad_norm": 1.3163009881973267, | |
| "learning_rate": 9.047153198982587e-05, | |
| "loss": 0.2441, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 277.00047583643124, | |
| "eval_accuracy": 0.8765440978351273, | |
| "eval_loss": 1.6306798458099365, | |
| "eval_macro_f1": 0.705625688490632, | |
| "eval_runtime": 47.7345, | |
| "eval_samples_per_second": 14.078, | |
| "eval_steps_per_second": 3.519, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 278.00034200743494, | |
| "grad_norm": 0.591773509979248, | |
| "learning_rate": 9.043240070436314e-05, | |
| "loss": 0.2416, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 279.00020817843864, | |
| "grad_norm": 0.4475375711917877, | |
| "learning_rate": 9.039326941890041e-05, | |
| "loss": 0.2414, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 279.00020817843864, | |
| "eval_accuracy": 0.8760741830973673, | |
| "eval_loss": 1.6609206199645996, | |
| "eval_macro_f1": 0.7028777819955179, | |
| "eval_runtime": 48.2216, | |
| "eval_samples_per_second": 13.936, | |
| "eval_steps_per_second": 3.484, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 280.0000743494424, | |
| "grad_norm": 0.6211313009262085, | |
| "learning_rate": 9.035413813343768e-05, | |
| "loss": 0.2366, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 280.0004460966543, | |
| "grad_norm": 0.7641573548316956, | |
| "learning_rate": 9.031500684797496e-05, | |
| "loss": 0.2371, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 280.0004460966543, | |
| "eval_accuracy": 0.8740981031647922, | |
| "eval_loss": 1.7170416116714478, | |
| "eval_macro_f1": 0.7018808537743648, | |
| "eval_runtime": 48.2987, | |
| "eval_samples_per_second": 13.913, | |
| "eval_steps_per_second": 3.478, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 281.000312267658, | |
| "grad_norm": 0.6113197207450867, | |
| "learning_rate": 9.027587556251223e-05, | |
| "loss": 0.2395, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 282.0001784386617, | |
| "grad_norm": 0.7073433995246887, | |
| "learning_rate": 9.02367442770495e-05, | |
| "loss": 0.2385, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 282.0001784386617, | |
| "eval_accuracy": 0.8743977079113086, | |
| "eval_loss": 1.7015947103500366, | |
| "eval_macro_f1": 0.7018147516994673, | |
| "eval_runtime": 47.6214, | |
| "eval_samples_per_second": 14.111, | |
| "eval_steps_per_second": 3.528, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 283.00004460966545, | |
| "grad_norm": 0.5074424147605896, | |
| "learning_rate": 9.019761299158677e-05, | |
| "loss": 0.2446, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 283.00041635687734, | |
| "grad_norm": 0.6375361084938049, | |
| "learning_rate": 9.015848170612406e-05, | |
| "loss": 0.2385, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 283.00041635687734, | |
| "eval_accuracy": 0.8740577076537963, | |
| "eval_loss": 1.6805062294006348, | |
| "eval_macro_f1": 0.7021029920673127, | |
| "eval_runtime": 47.3156, | |
| "eval_samples_per_second": 14.203, | |
| "eval_steps_per_second": 3.551, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 284.00028252788104, | |
| "grad_norm": 0.8452440500259399, | |
| "learning_rate": 9.011935042066132e-05, | |
| "loss": 0.2373, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 285.00014869888474, | |
| "grad_norm": 0.6203405857086182, | |
| "learning_rate": 9.00802191351986e-05, | |
| "loss": 0.2422, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 285.00014869888474, | |
| "eval_accuracy": 0.8719450365356796, | |
| "eval_loss": 1.74236261844635, | |
| "eval_macro_f1": 0.6972832445042872, | |
| "eval_runtime": 47.5763, | |
| "eval_samples_per_second": 14.125, | |
| "eval_steps_per_second": 3.531, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 286.0000148698885, | |
| "grad_norm": 0.6126111745834351, | |
| "learning_rate": 9.004108784973587e-05, | |
| "loss": 0.2386, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 286.0003866171004, | |
| "grad_norm": 0.4864870011806488, | |
| "learning_rate": 9.000195656427314e-05, | |
| "loss": 0.2403, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 286.0003866171004, | |
| "eval_accuracy": 0.8724967535271402, | |
| "eval_loss": 1.7255445718765259, | |
| "eval_macro_f1": 0.6975033096469039, | |
| "eval_runtime": 47.0772, | |
| "eval_samples_per_second": 14.274, | |
| "eval_steps_per_second": 3.569, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 287.0002527881041, | |
| "grad_norm": 1.169779658317566, | |
| "learning_rate": 8.996282527881041e-05, | |
| "loss": 0.2394, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 288.0001189591078, | |
| "grad_norm": 0.7006259560585022, | |
| "learning_rate": 8.992369399334769e-05, | |
| "loss": 0.2439, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 288.0001189591078, | |
| "eval_accuracy": 0.8719169307254028, | |
| "eval_loss": 1.7644731998443604, | |
| "eval_macro_f1": 0.6958908876410843, | |
| "eval_runtime": 46.9308, | |
| "eval_samples_per_second": 14.319, | |
| "eval_steps_per_second": 3.58, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 288.0004907063197, | |
| "grad_norm": 0.7097400426864624, | |
| "learning_rate": 8.988456270788496e-05, | |
| "loss": 0.2468, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 289.00035687732344, | |
| "grad_norm": 0.7856957912445068, | |
| "learning_rate": 8.984543142242223e-05, | |
| "loss": 0.2414, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 289.00035687732344, | |
| "eval_accuracy": 0.8756746554351618, | |
| "eval_loss": 1.7827283143997192, | |
| "eval_macro_f1": 0.7025011619593726, | |
| "eval_runtime": 47.4623, | |
| "eval_samples_per_second": 14.159, | |
| "eval_steps_per_second": 3.54, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 290.00022304832714, | |
| "grad_norm": 0.739522397518158, | |
| "learning_rate": 8.98063001369595e-05, | |
| "loss": 0.2357, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 291.00008921933085, | |
| "grad_norm": 0.5940971970558167, | |
| "learning_rate": 8.976716885149677e-05, | |
| "loss": 0.2403, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 291.00008921933085, | |
| "eval_accuracy": 0.8785574074303582, | |
| "eval_loss": 1.5922495126724243, | |
| "eval_macro_f1": 0.7042178263465622, | |
| "eval_runtime": 47.3707, | |
| "eval_samples_per_second": 14.186, | |
| "eval_steps_per_second": 3.546, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 291.00046096654273, | |
| "grad_norm": 0.4905942678451538, | |
| "learning_rate": 8.972803756603406e-05, | |
| "loss": 0.2342, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 292.0003271375465, | |
| "grad_norm": 0.6634092926979065, | |
| "learning_rate": 8.968890628057131e-05, | |
| "loss": 0.233, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 292.0003271375465, | |
| "eval_accuracy": 0.8748785909310688, | |
| "eval_loss": 1.8774493932724, | |
| "eval_macro_f1": 0.699269934433628, | |
| "eval_runtime": 47.8531, | |
| "eval_samples_per_second": 14.043, | |
| "eval_steps_per_second": 3.511, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 293.0001933085502, | |
| "grad_norm": 0.6518082618713379, | |
| "learning_rate": 8.96497749951086e-05, | |
| "loss": 0.2355, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 294.0000594795539, | |
| "grad_norm": 0.6070118546485901, | |
| "learning_rate": 8.961064370964587e-05, | |
| "loss": 0.2357, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 294.0000594795539, | |
| "eval_accuracy": 0.8755963691803673, | |
| "eval_loss": 1.6461971998214722, | |
| "eval_macro_f1": 0.6989364797944599, | |
| "eval_runtime": 47.0949, | |
| "eval_samples_per_second": 14.269, | |
| "eval_steps_per_second": 3.567, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 294.0004312267658, | |
| "grad_norm": 0.9214955568313599, | |
| "learning_rate": 8.957151242418314e-05, | |
| "loss": 0.2373, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 295.00029739776954, | |
| "grad_norm": 0.7139339447021484, | |
| "learning_rate": 8.95323811387204e-05, | |
| "loss": 0.2468, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 295.00029739776954, | |
| "eval_accuracy": 0.8752588183006401, | |
| "eval_loss": 1.6570920944213867, | |
| "eval_macro_f1": 0.7040971519065213, | |
| "eval_runtime": 47.3271, | |
| "eval_samples_per_second": 14.199, | |
| "eval_steps_per_second": 3.55, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 296.00016356877325, | |
| "grad_norm": 0.8615701794624329, | |
| "learning_rate": 8.949324985325769e-05, | |
| "loss": 0.2359, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 297.00002973977695, | |
| "grad_norm": 0.5898351073265076, | |
| "learning_rate": 8.945411856779495e-05, | |
| "loss": 0.236, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 297.00002973977695, | |
| "eval_accuracy": 0.8752191850777665, | |
| "eval_loss": 1.6926116943359375, | |
| "eval_macro_f1": 0.6999326241361957, | |
| "eval_runtime": 47.1963, | |
| "eval_samples_per_second": 14.238, | |
| "eval_steps_per_second": 3.56, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 297.00040148698884, | |
| "grad_norm": 0.7265653014183044, | |
| "learning_rate": 8.941498728233223e-05, | |
| "loss": 0.2431, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 298.00026765799254, | |
| "grad_norm": 0.8821515440940857, | |
| "learning_rate": 8.93758559968695e-05, | |
| "loss": 0.2341, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 298.00026765799254, | |
| "eval_accuracy": 0.8770282183880916, | |
| "eval_loss": 1.7504407167434692, | |
| "eval_macro_f1": 0.7051157111563411, | |
| "eval_runtime": 47.5468, | |
| "eval_samples_per_second": 14.133, | |
| "eval_steps_per_second": 3.533, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 299.0001338289963, | |
| "grad_norm": 0.8511101007461548, | |
| "learning_rate": 8.933672471140677e-05, | |
| "loss": 0.2356, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 299.0005055762082, | |
| "grad_norm": 2.185602903366089, | |
| "learning_rate": 8.929759342594405e-05, | |
| "loss": 0.2359, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 299.0005055762082, | |
| "eval_accuracy": 0.8763592902732993, | |
| "eval_loss": 1.6859861612319946, | |
| "eval_macro_f1": 0.700068017274899, | |
| "eval_runtime": 47.108, | |
| "eval_samples_per_second": 14.265, | |
| "eval_steps_per_second": 3.566, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 300.0003717472119, | |
| "grad_norm": 0.6592544317245483, | |
| "learning_rate": 8.925846214048131e-05, | |
| "loss": 0.2336, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 301.0002379182156, | |
| "grad_norm": 1.0603123903274536, | |
| "learning_rate": 8.92193308550186e-05, | |
| "loss": 0.2377, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 301.0002379182156, | |
| "eval_accuracy": 0.8745820471710939, | |
| "eval_loss": 1.668921709060669, | |
| "eval_macro_f1": 0.6997895576456115, | |
| "eval_runtime": 47.3196, | |
| "eval_samples_per_second": 14.201, | |
| "eval_steps_per_second": 3.55, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 302.00010408921935, | |
| "grad_norm": 0.744284451007843, | |
| "learning_rate": 8.918019956955586e-05, | |
| "loss": 0.2356, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 302.00047583643124, | |
| "grad_norm": 0.5185156464576721, | |
| "learning_rate": 8.914106828409313e-05, | |
| "loss": 0.2336, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 302.00047583643124, | |
| "eval_accuracy": 0.8787366418342881, | |
| "eval_loss": 1.6256543397903442, | |
| "eval_macro_f1": 0.7081033424785741, | |
| "eval_runtime": 48.159, | |
| "eval_samples_per_second": 13.954, | |
| "eval_steps_per_second": 3.488, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 303.00034200743494, | |
| "grad_norm": 0.48090434074401855, | |
| "learning_rate": 8.91019369986304e-05, | |
| "loss": 0.2319, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 304.00020817843864, | |
| "grad_norm": 0.5701866149902344, | |
| "learning_rate": 8.906280571316769e-05, | |
| "loss": 0.2329, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 304.00020817843864, | |
| "eval_accuracy": 0.8759103023549644, | |
| "eval_loss": 1.680335283279419, | |
| "eval_macro_f1": 0.7017996446225558, | |
| "eval_runtime": 47.2644, | |
| "eval_samples_per_second": 14.218, | |
| "eval_steps_per_second": 3.554, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 305.0000743494424, | |
| "grad_norm": 0.6104369759559631, | |
| "learning_rate": 8.902367442770494e-05, | |
| "loss": 0.2359, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 305.0004460966543, | |
| "grad_norm": 0.6198025941848755, | |
| "learning_rate": 8.898454314224223e-05, | |
| "loss": 0.236, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 305.0004460966543, | |
| "eval_accuracy": 0.8783429369856264, | |
| "eval_loss": 1.7152197360992432, | |
| "eval_macro_f1": 0.7087897186605305, | |
| "eval_runtime": 47.7992, | |
| "eval_samples_per_second": 14.059, | |
| "eval_steps_per_second": 3.515, | |
| "step": 20800 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 134500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 20, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |