TounsiLM-8b / checkpoint-1584 /trainer_state.json
alabenayed's picture
Upload folder using huggingface_hub
4d65edc verified
Raw
History Blame
46.4 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 200,
"global_step": 1584,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 2.4575500309467317,
"epoch": 0.012630249447426587,
"grad_norm": 4.916348934173584,
"learning_rate": 1.8750000000000003e-06,
"loss": 3.6598,
"mean_token_accuracy": 0.4153611570596695,
"num_tokens": 59642.0,
"step": 10
},
{
"entropy": 2.4072387635707857,
"epoch": 0.025260498894853173,
"grad_norm": 3.8026137351989746,
"learning_rate": 3.958333333333333e-06,
"loss": 3.3603,
"mean_token_accuracy": 0.4350100517272949,
"num_tokens": 119219.0,
"step": 20
},
{
"entropy": 2.3899864494800567,
"epoch": 0.03789074834227976,
"grad_norm": 3.7880399227142334,
"learning_rate": 6.041666666666667e-06,
"loss": 2.9434,
"mean_token_accuracy": 0.4788561977446079,
"num_tokens": 179590.0,
"step": 30
},
{
"entropy": 2.1122478008270265,
"epoch": 0.05052099778970635,
"grad_norm": 3.0592074394226074,
"learning_rate": 8.125000000000001e-06,
"loss": 2.3919,
"mean_token_accuracy": 0.574567300081253,
"num_tokens": 238845.0,
"step": 40
},
{
"entropy": 1.7037649989128112,
"epoch": 0.06315124723713293,
"grad_norm": 1.5836262702941895,
"learning_rate": 9.993489583333334e-06,
"loss": 1.912,
"mean_token_accuracy": 0.6467478528618813,
"num_tokens": 298317.0,
"step": 50
},
{
"entropy": 1.5623225390911102,
"epoch": 0.07578149668455952,
"grad_norm": 1.217679738998413,
"learning_rate": 9.928385416666668e-06,
"loss": 1.6762,
"mean_token_accuracy": 0.679128734767437,
"num_tokens": 357858.0,
"step": 60
},
{
"entropy": 1.5071247130632401,
"epoch": 0.0884117461319861,
"grad_norm": 0.973615288734436,
"learning_rate": 9.863281250000001e-06,
"loss": 1.5372,
"mean_token_accuracy": 0.6943170607089997,
"num_tokens": 418834.0,
"step": 70
},
{
"entropy": 1.4568549275398255,
"epoch": 0.1010419955794127,
"grad_norm": 0.9853116869926453,
"learning_rate": 9.798177083333335e-06,
"loss": 1.4751,
"mean_token_accuracy": 0.7024633795022964,
"num_tokens": 478960.0,
"step": 80
},
{
"entropy": 1.4889154583215714,
"epoch": 0.11367224502683929,
"grad_norm": 0.9147132039070129,
"learning_rate": 9.733072916666667e-06,
"loss": 1.474,
"mean_token_accuracy": 0.6996816232800483,
"num_tokens": 541795.0,
"step": 90
},
{
"entropy": 1.4158774405717849,
"epoch": 0.12630249447426586,
"grad_norm": 0.9684887528419495,
"learning_rate": 9.66796875e-06,
"loss": 1.3805,
"mean_token_accuracy": 0.7165829420089722,
"num_tokens": 601174.0,
"step": 100
},
{
"entropy": 1.4276181221008302,
"epoch": 0.13893274392169244,
"grad_norm": 0.9440239667892456,
"learning_rate": 9.602864583333335e-06,
"loss": 1.3718,
"mean_token_accuracy": 0.7143253713846207,
"num_tokens": 661048.0,
"step": 110
},
{
"entropy": 1.4359370201826096,
"epoch": 0.15156299336911905,
"grad_norm": 0.8779081702232361,
"learning_rate": 9.537760416666667e-06,
"loss": 1.3661,
"mean_token_accuracy": 0.7162409156560898,
"num_tokens": 722298.0,
"step": 120
},
{
"entropy": 1.3943599790334702,
"epoch": 0.16419324281654563,
"grad_norm": 0.8999291062355042,
"learning_rate": 9.47265625e-06,
"loss": 1.3193,
"mean_token_accuracy": 0.7252198755741119,
"num_tokens": 782683.0,
"step": 130
},
{
"entropy": 1.3758090347051621,
"epoch": 0.1768234922639722,
"grad_norm": 0.8218080997467041,
"learning_rate": 9.407552083333334e-06,
"loss": 1.3054,
"mean_token_accuracy": 0.7277572214603424,
"num_tokens": 842988.0,
"step": 140
},
{
"entropy": 1.381770172715187,
"epoch": 0.1894537417113988,
"grad_norm": 0.8062577843666077,
"learning_rate": 9.342447916666668e-06,
"loss": 1.3291,
"mean_token_accuracy": 0.7222751513123512,
"num_tokens": 903912.0,
"step": 150
},
{
"entropy": 1.352141672372818,
"epoch": 0.2020839911588254,
"grad_norm": 0.8221862316131592,
"learning_rate": 9.277343750000001e-06,
"loss": 1.2974,
"mean_token_accuracy": 0.7260218441486359,
"num_tokens": 964887.0,
"step": 160
},
{
"entropy": 1.346352329850197,
"epoch": 0.21471424060625197,
"grad_norm": 0.7375346422195435,
"learning_rate": 9.212239583333335e-06,
"loss": 1.2969,
"mean_token_accuracy": 0.7252495244145394,
"num_tokens": 1026887.0,
"step": 170
},
{
"entropy": 1.3165962457656861,
"epoch": 0.22734449005367857,
"grad_norm": 0.7950690388679504,
"learning_rate": 9.147135416666667e-06,
"loss": 1.2824,
"mean_token_accuracy": 0.7250601649284363,
"num_tokens": 1086995.0,
"step": 180
},
{
"entropy": 1.3047442227602004,
"epoch": 0.23997473950110515,
"grad_norm": 0.7147737145423889,
"learning_rate": 9.082031250000001e-06,
"loss": 1.2628,
"mean_token_accuracy": 0.7318986386060715,
"num_tokens": 1147209.0,
"step": 190
},
{
"entropy": 1.2989415228366852,
"epoch": 0.25260498894853173,
"grad_norm": 0.756094753742218,
"learning_rate": 9.016927083333335e-06,
"loss": 1.2484,
"mean_token_accuracy": 0.7319697335362434,
"num_tokens": 1207602.0,
"step": 200
},
{
"entropy": 1.2904020875692368,
"epoch": 0.2652352383959583,
"grad_norm": 0.7715655565261841,
"learning_rate": 8.951822916666667e-06,
"loss": 1.2447,
"mean_token_accuracy": 0.7349080622196198,
"num_tokens": 1267500.0,
"step": 210
},
{
"entropy": 1.2543610483407974,
"epoch": 0.2778654878433849,
"grad_norm": 0.6824166774749756,
"learning_rate": 8.88671875e-06,
"loss": 1.2111,
"mean_token_accuracy": 0.7386362582445145,
"num_tokens": 1327666.0,
"step": 220
},
{
"entropy": 1.2946221768856048,
"epoch": 0.2904957372908115,
"grad_norm": 0.6559598445892334,
"learning_rate": 8.821614583333334e-06,
"loss": 1.2574,
"mean_token_accuracy": 0.7287471711635589,
"num_tokens": 1389712.0,
"step": 230
},
{
"entropy": 1.2489944666624069,
"epoch": 0.3031259867382381,
"grad_norm": 0.7000382542610168,
"learning_rate": 8.756510416666666e-06,
"loss": 1.2092,
"mean_token_accuracy": 0.7372458636760711,
"num_tokens": 1448670.0,
"step": 240
},
{
"entropy": 1.2534994542598725,
"epoch": 0.3157562361856647,
"grad_norm": 0.6579836010932922,
"learning_rate": 8.69140625e-06,
"loss": 1.2132,
"mean_token_accuracy": 0.7380462676286698,
"num_tokens": 1508428.0,
"step": 250
},
{
"entropy": 1.2474523901939392,
"epoch": 0.32838648563309125,
"grad_norm": 0.6546089053153992,
"learning_rate": 8.626302083333334e-06,
"loss": 1.2103,
"mean_token_accuracy": 0.7395781621336937,
"num_tokens": 1568018.0,
"step": 260
},
{
"entropy": 1.2445458561182021,
"epoch": 0.34101673508051783,
"grad_norm": 0.6377413868904114,
"learning_rate": 8.561197916666667e-06,
"loss": 1.2007,
"mean_token_accuracy": 0.7419240340590477,
"num_tokens": 1627904.0,
"step": 270
},
{
"entropy": 1.279063493013382,
"epoch": 0.3536469845279444,
"grad_norm": 0.6460844278335571,
"learning_rate": 8.496093750000001e-06,
"loss": 1.2497,
"mean_token_accuracy": 0.729638360440731,
"num_tokens": 1689637.0,
"step": 280
},
{
"entropy": 1.2362476408481597,
"epoch": 0.366277233975371,
"grad_norm": 0.6648440361022949,
"learning_rate": 8.430989583333335e-06,
"loss": 1.2091,
"mean_token_accuracy": 0.7385585099458695,
"num_tokens": 1749861.0,
"step": 290
},
{
"entropy": 1.2533661901950837,
"epoch": 0.3789074834227976,
"grad_norm": 0.6637682318687439,
"learning_rate": 8.365885416666667e-06,
"loss": 1.2163,
"mean_token_accuracy": 0.7371826618909836,
"num_tokens": 1810407.0,
"step": 300
},
{
"entropy": 1.2383619010448457,
"epoch": 0.3915377328702242,
"grad_norm": 0.660043478012085,
"learning_rate": 8.30078125e-06,
"loss": 1.2026,
"mean_token_accuracy": 0.7364327058196067,
"num_tokens": 1871544.0,
"step": 310
},
{
"entropy": 1.2316229462623596,
"epoch": 0.4041679823176508,
"grad_norm": 0.6285788416862488,
"learning_rate": 8.235677083333334e-06,
"loss": 1.2064,
"mean_token_accuracy": 0.7371214032173157,
"num_tokens": 1932125.0,
"step": 320
},
{
"entropy": 1.2459111303091048,
"epoch": 0.41679823176507735,
"grad_norm": 0.6204569339752197,
"learning_rate": 8.170572916666666e-06,
"loss": 1.1997,
"mean_token_accuracy": 0.7365512102842331,
"num_tokens": 1993924.0,
"step": 330
},
{
"entropy": 1.2156363114714623,
"epoch": 0.42942848121250393,
"grad_norm": 0.6501284241676331,
"learning_rate": 8.10546875e-06,
"loss": 1.1863,
"mean_token_accuracy": 0.741255110502243,
"num_tokens": 2054496.0,
"step": 340
},
{
"entropy": 1.2222040683031081,
"epoch": 0.4420587306599305,
"grad_norm": 0.602418065071106,
"learning_rate": 8.040364583333334e-06,
"loss": 1.1913,
"mean_token_accuracy": 0.739654652774334,
"num_tokens": 2114825.0,
"step": 350
},
{
"entropy": 1.2437947690486908,
"epoch": 0.45468898010735714,
"grad_norm": 0.6289706230163574,
"learning_rate": 7.975260416666668e-06,
"loss": 1.2142,
"mean_token_accuracy": 0.7374308854341507,
"num_tokens": 2176058.0,
"step": 360
},
{
"entropy": 1.2139764934778214,
"epoch": 0.4673192295547837,
"grad_norm": 0.6439516544342041,
"learning_rate": 7.910156250000001e-06,
"loss": 1.1769,
"mean_token_accuracy": 0.7426491379737854,
"num_tokens": 2236783.0,
"step": 370
},
{
"entropy": 1.19720456302166,
"epoch": 0.4799494790022103,
"grad_norm": 0.6499606966972351,
"learning_rate": 7.845052083333335e-06,
"loss": 1.1829,
"mean_token_accuracy": 0.7399616882205009,
"num_tokens": 2298432.0,
"step": 380
},
{
"entropy": 1.205560651421547,
"epoch": 0.4925797284496369,
"grad_norm": 0.6545577645301819,
"learning_rate": 7.779947916666667e-06,
"loss": 1.1577,
"mean_token_accuracy": 0.7463845536112785,
"num_tokens": 2357808.0,
"step": 390
},
{
"entropy": 1.19621299803257,
"epoch": 0.5052099778970635,
"grad_norm": 0.6930111050605774,
"learning_rate": 7.71484375e-06,
"loss": 1.1583,
"mean_token_accuracy": 0.7453805327415466,
"num_tokens": 2417574.0,
"step": 400
},
{
"entropy": 1.1963690370321274,
"epoch": 0.5178402273444901,
"grad_norm": 0.648593544960022,
"learning_rate": 7.649739583333334e-06,
"loss": 1.1723,
"mean_token_accuracy": 0.7415376961231231,
"num_tokens": 2478088.0,
"step": 410
},
{
"entropy": 1.216522666811943,
"epoch": 0.5304704767919166,
"grad_norm": 0.6348926424980164,
"learning_rate": 7.5846354166666665e-06,
"loss": 1.1701,
"mean_token_accuracy": 0.7432737082242966,
"num_tokens": 2538612.0,
"step": 420
},
{
"entropy": 1.1990931153297424,
"epoch": 0.5431007262393432,
"grad_norm": 0.627249002456665,
"learning_rate": 7.51953125e-06,
"loss": 1.1688,
"mean_token_accuracy": 0.7435364574193954,
"num_tokens": 2599023.0,
"step": 430
},
{
"entropy": 1.1872963696718215,
"epoch": 0.5557309756867698,
"grad_norm": 0.6614134311676025,
"learning_rate": 7.454427083333334e-06,
"loss": 1.1622,
"mean_token_accuracy": 0.7470521196722985,
"num_tokens": 2658338.0,
"step": 440
},
{
"entropy": 1.215770760178566,
"epoch": 0.5683612251341964,
"grad_norm": 0.6228342652320862,
"learning_rate": 7.389322916666667e-06,
"loss": 1.1898,
"mean_token_accuracy": 0.7409805700182914,
"num_tokens": 2719316.0,
"step": 450
},
{
"entropy": 1.1998004853725432,
"epoch": 0.580991474581623,
"grad_norm": 0.6525698304176331,
"learning_rate": 7.3242187500000006e-06,
"loss": 1.167,
"mean_token_accuracy": 0.7438512742519379,
"num_tokens": 2780272.0,
"step": 460
},
{
"entropy": 1.1898580551147462,
"epoch": 0.5936217240290496,
"grad_norm": 0.6669884324073792,
"learning_rate": 7.259114583333334e-06,
"loss": 1.1669,
"mean_token_accuracy": 0.7437147945165634,
"num_tokens": 2840261.0,
"step": 470
},
{
"entropy": 1.21882204413414,
"epoch": 0.6062519734764762,
"grad_norm": 0.6129422783851624,
"learning_rate": 7.194010416666667e-06,
"loss": 1.177,
"mean_token_accuracy": 0.7423913896083831,
"num_tokens": 2901347.0,
"step": 480
},
{
"entropy": 1.163309469819069,
"epoch": 0.6188822229239027,
"grad_norm": 0.6334741115570068,
"learning_rate": 7.128906250000001e-06,
"loss": 1.1393,
"mean_token_accuracy": 0.7511255607008934,
"num_tokens": 2960518.0,
"step": 490
},
{
"entropy": 1.1994746267795562,
"epoch": 0.6315124723713293,
"grad_norm": 0.6261829733848572,
"learning_rate": 7.063802083333335e-06,
"loss": 1.1605,
"mean_token_accuracy": 0.7433080047369003,
"num_tokens": 3021957.0,
"step": 500
},
{
"entropy": 1.167793545126915,
"epoch": 0.6441427218187559,
"grad_norm": 0.5909908413887024,
"learning_rate": 6.998697916666667e-06,
"loss": 1.1468,
"mean_token_accuracy": 0.7475745663046837,
"num_tokens": 3083301.0,
"step": 510
},
{
"entropy": 1.1670663714408875,
"epoch": 0.6567729712661825,
"grad_norm": 0.6018249988555908,
"learning_rate": 6.93359375e-06,
"loss": 1.1425,
"mean_token_accuracy": 0.7485125616192818,
"num_tokens": 3143187.0,
"step": 520
},
{
"entropy": 1.1626142784953117,
"epoch": 0.6694032207136091,
"grad_norm": 0.6088816523551941,
"learning_rate": 6.868489583333334e-06,
"loss": 1.1297,
"mean_token_accuracy": 0.7490727782249451,
"num_tokens": 3202489.0,
"step": 530
},
{
"entropy": 1.1758243769407273,
"epoch": 0.6820334701610357,
"grad_norm": 0.6021592020988464,
"learning_rate": 6.803385416666667e-06,
"loss": 1.1656,
"mean_token_accuracy": 0.7443674057722092,
"num_tokens": 3263476.0,
"step": 540
},
{
"entropy": 1.179671287536621,
"epoch": 0.6946637196084623,
"grad_norm": 0.5955655574798584,
"learning_rate": 6.738281250000001e-06,
"loss": 1.1385,
"mean_token_accuracy": 0.7481714516878128,
"num_tokens": 3324008.0,
"step": 550
},
{
"entropy": 1.1886188358068466,
"epoch": 0.7072939690558888,
"grad_norm": 0.6246835589408875,
"learning_rate": 6.6731770833333345e-06,
"loss": 1.1607,
"mean_token_accuracy": 0.7447509884834289,
"num_tokens": 3383861.0,
"step": 560
},
{
"entropy": 1.1690475821495057,
"epoch": 0.7199242185033154,
"grad_norm": 0.606743335723877,
"learning_rate": 6.6080729166666665e-06,
"loss": 1.1298,
"mean_token_accuracy": 0.7493681326508522,
"num_tokens": 3443946.0,
"step": 570
},
{
"entropy": 1.1725697651505471,
"epoch": 0.732554467950742,
"grad_norm": 0.6846170425415039,
"learning_rate": 6.54296875e-06,
"loss": 1.1452,
"mean_token_accuracy": 0.7482522815465927,
"num_tokens": 3503787.0,
"step": 580
},
{
"entropy": 1.1713406786322593,
"epoch": 0.7451847173981686,
"grad_norm": 0.6522074341773987,
"learning_rate": 6.477864583333334e-06,
"loss": 1.1338,
"mean_token_accuracy": 0.7498400524258614,
"num_tokens": 3563403.0,
"step": 590
},
{
"entropy": 1.1848436295986176,
"epoch": 0.7578149668455952,
"grad_norm": 0.6417824625968933,
"learning_rate": 6.412760416666667e-06,
"loss": 1.1499,
"mean_token_accuracy": 0.7452719643712044,
"num_tokens": 3625007.0,
"step": 600
},
{
"entropy": 1.1822121858596801,
"epoch": 0.7704452162930218,
"grad_norm": 0.6329619884490967,
"learning_rate": 6.3476562500000006e-06,
"loss": 1.159,
"mean_token_accuracy": 0.7452733591198921,
"num_tokens": 3686099.0,
"step": 610
},
{
"entropy": 1.190292978286743,
"epoch": 0.7830754657404484,
"grad_norm": 0.6627410054206848,
"learning_rate": 6.282552083333334e-06,
"loss": 1.1558,
"mean_token_accuracy": 0.7438480347394943,
"num_tokens": 3747233.0,
"step": 620
},
{
"entropy": 1.1619529083371163,
"epoch": 0.7957057151878749,
"grad_norm": 0.5941329002380371,
"learning_rate": 6.217447916666667e-06,
"loss": 1.1377,
"mean_token_accuracy": 0.7503219902515411,
"num_tokens": 3807833.0,
"step": 630
},
{
"entropy": 1.1658748656511306,
"epoch": 0.8083359646353016,
"grad_norm": 0.6438832879066467,
"learning_rate": 6.152343750000001e-06,
"loss": 1.1397,
"mean_token_accuracy": 0.7471553102135658,
"num_tokens": 3868549.0,
"step": 640
},
{
"entropy": 1.1782082825899125,
"epoch": 0.8209662140827282,
"grad_norm": 0.6389635801315308,
"learning_rate": 6.087239583333335e-06,
"loss": 1.1434,
"mean_token_accuracy": 0.7477709770202636,
"num_tokens": 3929057.0,
"step": 650
},
{
"entropy": 1.1625961899757384,
"epoch": 0.8335964635301547,
"grad_norm": 0.6134201288223267,
"learning_rate": 6.022135416666667e-06,
"loss": 1.1352,
"mean_token_accuracy": 0.748055274784565,
"num_tokens": 3990676.0,
"step": 660
},
{
"entropy": 1.1510928481817246,
"epoch": 0.8462267129775813,
"grad_norm": 0.6336613893508911,
"learning_rate": 5.95703125e-06,
"loss": 1.1182,
"mean_token_accuracy": 0.7524245917797089,
"num_tokens": 4051046.0,
"step": 670
},
{
"entropy": 1.1498646020889283,
"epoch": 0.8588569624250079,
"grad_norm": 0.6758144497871399,
"learning_rate": 5.891927083333334e-06,
"loss": 1.1186,
"mean_token_accuracy": 0.7507978692650795,
"num_tokens": 4111084.0,
"step": 680
},
{
"entropy": 1.167962297797203,
"epoch": 0.8714872118724345,
"grad_norm": 0.6285990476608276,
"learning_rate": 5.826822916666667e-06,
"loss": 1.1395,
"mean_token_accuracy": 0.7476246923208236,
"num_tokens": 4172628.0,
"step": 690
},
{
"entropy": 1.1178194358944893,
"epoch": 0.884117461319861,
"grad_norm": 0.64762282371521,
"learning_rate": 5.761718750000001e-06,
"loss": 1.0919,
"mean_token_accuracy": 0.7569874793291091,
"num_tokens": 4231821.0,
"step": 700
},
{
"entropy": 1.1606462925672532,
"epoch": 0.8967477107672877,
"grad_norm": 0.6292758584022522,
"learning_rate": 5.6966145833333344e-06,
"loss": 1.1354,
"mean_token_accuracy": 0.750880953669548,
"num_tokens": 4292646.0,
"step": 710
},
{
"entropy": 1.1580617666244506,
"epoch": 0.9093779602147143,
"grad_norm": 0.6393706798553467,
"learning_rate": 5.6315104166666665e-06,
"loss": 1.1205,
"mean_token_accuracy": 0.7499566927552224,
"num_tokens": 4353199.0,
"step": 720
},
{
"entropy": 1.1515695974230766,
"epoch": 0.9220082096621408,
"grad_norm": 0.687380313873291,
"learning_rate": 5.56640625e-06,
"loss": 1.1138,
"mean_token_accuracy": 0.7514134287834168,
"num_tokens": 4414122.0,
"step": 730
},
{
"entropy": 1.1574165880680085,
"epoch": 0.9346384591095674,
"grad_norm": 0.6102684736251831,
"learning_rate": 5.501302083333334e-06,
"loss": 1.1302,
"mean_token_accuracy": 0.7507740229368209,
"num_tokens": 4474548.0,
"step": 740
},
{
"entropy": 1.1491190433502196,
"epoch": 0.947268708556994,
"grad_norm": 0.623504638671875,
"learning_rate": 5.436197916666667e-06,
"loss": 1.129,
"mean_token_accuracy": 0.7512574091553688,
"num_tokens": 4534678.0,
"step": 750
},
{
"entropy": 1.1538215219974517,
"epoch": 0.9598989580044206,
"grad_norm": 0.6368807554244995,
"learning_rate": 5.3710937500000005e-06,
"loss": 1.1181,
"mean_token_accuracy": 0.7520082175731659,
"num_tokens": 4594878.0,
"step": 760
},
{
"entropy": 1.1623035803437234,
"epoch": 0.9725292074518471,
"grad_norm": 0.6332852840423584,
"learning_rate": 5.305989583333334e-06,
"loss": 1.1308,
"mean_token_accuracy": 0.7497873172163964,
"num_tokens": 4656513.0,
"step": 770
},
{
"entropy": 1.1483627527952194,
"epoch": 0.9851594568992738,
"grad_norm": 0.6341389417648315,
"learning_rate": 5.240885416666667e-06,
"loss": 1.1142,
"mean_token_accuracy": 0.7533516198396683,
"num_tokens": 4717111.0,
"step": 780
},
{
"entropy": 1.1455359414219857,
"epoch": 0.9977897063467004,
"grad_norm": 0.6641396880149841,
"learning_rate": 5.17578125e-06,
"loss": 1.1117,
"mean_token_accuracy": 0.7530950620770455,
"num_tokens": 4777713.0,
"step": 790
},
{
"entropy": 1.148778918461922,
"epoch": 1.0101041995579412,
"grad_norm": 0.6454346776008606,
"learning_rate": 5.110677083333334e-06,
"loss": 1.1146,
"mean_token_accuracy": 0.7511914097345792,
"num_tokens": 4837103.0,
"step": 800
},
{
"entropy": 1.1441998034715652,
"epoch": 1.0227344490053678,
"grad_norm": 0.6368332505226135,
"learning_rate": 5.045572916666667e-06,
"loss": 1.1003,
"mean_token_accuracy": 0.7535203993320465,
"num_tokens": 4898715.0,
"step": 810
},
{
"entropy": 1.1195117503404617,
"epoch": 1.0353646984527944,
"grad_norm": 0.6546683311462402,
"learning_rate": 4.98046875e-06,
"loss": 1.0924,
"mean_token_accuracy": 0.7574156150221825,
"num_tokens": 4959681.0,
"step": 820
},
{
"entropy": 1.1403603315353394,
"epoch": 1.047994947900221,
"grad_norm": 0.6645976305007935,
"learning_rate": 4.915364583333333e-06,
"loss": 1.1031,
"mean_token_accuracy": 0.7548869714140892,
"num_tokens": 5020382.0,
"step": 830
},
{
"entropy": 1.1299657106399537,
"epoch": 1.0606251973476477,
"grad_norm": 0.6225126385688782,
"learning_rate": 4.850260416666667e-06,
"loss": 1.0915,
"mean_token_accuracy": 0.7562400087714195,
"num_tokens": 5080360.0,
"step": 840
},
{
"entropy": 1.12370226085186,
"epoch": 1.0732554467950741,
"grad_norm": 0.6478942036628723,
"learning_rate": 4.785156250000001e-06,
"loss": 1.1064,
"mean_token_accuracy": 0.7542634457349777,
"num_tokens": 5140349.0,
"step": 850
},
{
"entropy": 1.1469928681850434,
"epoch": 1.0858856962425008,
"grad_norm": 0.615678608417511,
"learning_rate": 4.7200520833333336e-06,
"loss": 1.1043,
"mean_token_accuracy": 0.7529336720705032,
"num_tokens": 5201690.0,
"step": 860
},
{
"entropy": 1.137891921401024,
"epoch": 1.0985159456899274,
"grad_norm": 0.6458525061607361,
"learning_rate": 4.654947916666667e-06,
"loss": 1.1081,
"mean_token_accuracy": 0.7543051362037658,
"num_tokens": 5261698.0,
"step": 870
},
{
"entropy": 1.1202880129218102,
"epoch": 1.111146195137354,
"grad_norm": 0.6362131237983704,
"learning_rate": 4.58984375e-06,
"loss": 1.0951,
"mean_token_accuracy": 0.7552427321672439,
"num_tokens": 5321775.0,
"step": 880
},
{
"entropy": 1.1365787714719773,
"epoch": 1.1237764445847804,
"grad_norm": 0.6511764526367188,
"learning_rate": 4.524739583333334e-06,
"loss": 1.0961,
"mean_token_accuracy": 0.7562274217605591,
"num_tokens": 5383140.0,
"step": 890
},
{
"entropy": 1.1074503496289254,
"epoch": 1.136406694032207,
"grad_norm": 0.6207822561264038,
"learning_rate": 4.459635416666668e-06,
"loss": 1.0848,
"mean_token_accuracy": 0.7591574639081955,
"num_tokens": 5443006.0,
"step": 900
},
{
"entropy": 1.1545074522495269,
"epoch": 1.1490369434796337,
"grad_norm": 0.6404831409454346,
"learning_rate": 4.3945312500000005e-06,
"loss": 1.1121,
"mean_token_accuracy": 0.7507721096277237,
"num_tokens": 5503942.0,
"step": 910
},
{
"entropy": 1.1401477769017219,
"epoch": 1.1616671929270603,
"grad_norm": 0.6468749046325684,
"learning_rate": 4.329427083333333e-06,
"loss": 1.1011,
"mean_token_accuracy": 0.753543746471405,
"num_tokens": 5564518.0,
"step": 920
},
{
"entropy": 1.0945423126220704,
"epoch": 1.174297442374487,
"grad_norm": 0.6418051719665527,
"learning_rate": 4.264322916666667e-06,
"loss": 1.0614,
"mean_token_accuracy": 0.7643799662590027,
"num_tokens": 5624109.0,
"step": 930
},
{
"entropy": 1.1136713281273842,
"epoch": 1.1869276918219134,
"grad_norm": 0.6422064304351807,
"learning_rate": 4.19921875e-06,
"loss": 1.0974,
"mean_token_accuracy": 0.7561314895749092,
"num_tokens": 5684801.0,
"step": 940
},
{
"entropy": 1.1215770334005355,
"epoch": 1.19955794126934,
"grad_norm": 0.6453995108604431,
"learning_rate": 4.134114583333334e-06,
"loss": 1.0801,
"mean_token_accuracy": 0.7590720430016518,
"num_tokens": 5745499.0,
"step": 950
},
{
"entropy": 1.1010483756661416,
"epoch": 1.2121881907167666,
"grad_norm": 0.61696857213974,
"learning_rate": 4.0690104166666675e-06,
"loss": 1.049,
"mean_token_accuracy": 0.7627070844173431,
"num_tokens": 5806117.0,
"step": 960
},
{
"entropy": 1.1082940384745599,
"epoch": 1.2248184401641933,
"grad_norm": 0.6523500680923462,
"learning_rate": 4.00390625e-06,
"loss": 1.0807,
"mean_token_accuracy": 0.7579552844166756,
"num_tokens": 5865537.0,
"step": 970
},
{
"entropy": 1.102595229446888,
"epoch": 1.23744868961162,
"grad_norm": 0.6376118063926697,
"learning_rate": 3.938802083333333e-06,
"loss": 1.0679,
"mean_token_accuracy": 0.7592279806733131,
"num_tokens": 5925254.0,
"step": 980
},
{
"entropy": 1.1277900233864784,
"epoch": 1.2500789390590463,
"grad_norm": 0.6571747660636902,
"learning_rate": 3.873697916666667e-06,
"loss": 1.0888,
"mean_token_accuracy": 0.7549166217446327,
"num_tokens": 5986084.0,
"step": 990
},
{
"entropy": 1.113915103673935,
"epoch": 1.262709188506473,
"grad_norm": 0.6531611084938049,
"learning_rate": 3.8085937500000002e-06,
"loss": 1.0718,
"mean_token_accuracy": 0.7577856734395028,
"num_tokens": 6046857.0,
"step": 1000
},
{
"entropy": 1.0966202467679977,
"epoch": 1.2753394379538996,
"grad_norm": 0.636698842048645,
"learning_rate": 3.7434895833333336e-06,
"loss": 1.0699,
"mean_token_accuracy": 0.7601938605308532,
"num_tokens": 6106886.0,
"step": 1010
},
{
"entropy": 1.1121985822916032,
"epoch": 1.2879696874013262,
"grad_norm": 0.6492161750793457,
"learning_rate": 3.6783854166666673e-06,
"loss": 1.0851,
"mean_token_accuracy": 0.7588792949914932,
"num_tokens": 6167935.0,
"step": 1020
},
{
"entropy": 1.1355163961648942,
"epoch": 1.3005999368487529,
"grad_norm": 0.6697131395339966,
"learning_rate": 3.61328125e-06,
"loss": 1.094,
"mean_token_accuracy": 0.754327917098999,
"num_tokens": 6228870.0,
"step": 1030
},
{
"entropy": 1.11816665828228,
"epoch": 1.3132301862961793,
"grad_norm": 0.6773020625114441,
"learning_rate": 3.5481770833333335e-06,
"loss": 1.0893,
"mean_token_accuracy": 0.7571294933557511,
"num_tokens": 6288847.0,
"step": 1040
},
{
"entropy": 1.1343947052955627,
"epoch": 1.325860435743606,
"grad_norm": 0.6566488146781921,
"learning_rate": 3.483072916666667e-06,
"loss": 1.0875,
"mean_token_accuracy": 0.755756102502346,
"num_tokens": 6350161.0,
"step": 1050
},
{
"entropy": 1.1109364911913873,
"epoch": 1.3384906851910325,
"grad_norm": 0.6575057506561279,
"learning_rate": 3.41796875e-06,
"loss": 1.0782,
"mean_token_accuracy": 0.7591001376509666,
"num_tokens": 6410972.0,
"step": 1060
},
{
"entropy": 1.1165167808532714,
"epoch": 1.3511209346384592,
"grad_norm": 0.6655089259147644,
"learning_rate": 3.3528645833333334e-06,
"loss": 1.0901,
"mean_token_accuracy": 0.7573199763894081,
"num_tokens": 6471984.0,
"step": 1070
},
{
"entropy": 1.1066906094551086,
"epoch": 1.3637511840858858,
"grad_norm": 0.6363748908042908,
"learning_rate": 3.287760416666667e-06,
"loss": 1.0716,
"mean_token_accuracy": 0.7598252177238465,
"num_tokens": 6532514.0,
"step": 1080
},
{
"entropy": 1.1047193810343743,
"epoch": 1.3763814335333122,
"grad_norm": 0.6684281826019287,
"learning_rate": 3.2226562500000004e-06,
"loss": 1.0823,
"mean_token_accuracy": 0.7593759268522262,
"num_tokens": 6592949.0,
"step": 1090
},
{
"entropy": 1.1348285049200058,
"epoch": 1.3890116829807388,
"grad_norm": 0.6439023017883301,
"learning_rate": 3.1575520833333333e-06,
"loss": 1.1031,
"mean_token_accuracy": 0.7526842474937439,
"num_tokens": 6654231.0,
"step": 1100
},
{
"entropy": 1.1191302105784415,
"epoch": 1.4016419324281655,
"grad_norm": 0.6556984186172485,
"learning_rate": 3.092447916666667e-06,
"loss": 1.0799,
"mean_token_accuracy": 0.7590983435511589,
"num_tokens": 6714430.0,
"step": 1110
},
{
"entropy": 1.093433029949665,
"epoch": 1.4142721818755921,
"grad_norm": 0.6618829965591431,
"learning_rate": 3.0273437500000003e-06,
"loss": 1.0614,
"mean_token_accuracy": 0.7611085593700408,
"num_tokens": 6774176.0,
"step": 1120
},
{
"entropy": 1.135184645652771,
"epoch": 1.4269024313230187,
"grad_norm": 0.6382298469543457,
"learning_rate": 2.962239583333333e-06,
"loss": 1.0939,
"mean_token_accuracy": 0.7532851651310921,
"num_tokens": 6836522.0,
"step": 1130
},
{
"entropy": 1.1093149304389953,
"epoch": 1.4395326807704452,
"grad_norm": 0.6382166147232056,
"learning_rate": 2.897135416666667e-06,
"loss": 1.0709,
"mean_token_accuracy": 0.7608326107263566,
"num_tokens": 6896353.0,
"step": 1140
},
{
"entropy": 1.1047044202685357,
"epoch": 1.4521629302178718,
"grad_norm": 0.6356373429298401,
"learning_rate": 2.8320312500000002e-06,
"loss": 1.0738,
"mean_token_accuracy": 0.7615469440817833,
"num_tokens": 6956828.0,
"step": 1150
},
{
"entropy": 1.1073317646980285,
"epoch": 1.4647931796652984,
"grad_norm": 0.6593008041381836,
"learning_rate": 2.7669270833333335e-06,
"loss": 1.0589,
"mean_token_accuracy": 0.7599197804927826,
"num_tokens": 7017026.0,
"step": 1160
},
{
"entropy": 1.0851576775312424,
"epoch": 1.4774234291127248,
"grad_norm": 0.6466282606124878,
"learning_rate": 2.7018229166666673e-06,
"loss": 1.0584,
"mean_token_accuracy": 0.7626572713255882,
"num_tokens": 7076806.0,
"step": 1170
},
{
"entropy": 1.1103300124406814,
"epoch": 1.4900536785601517,
"grad_norm": 0.6285493969917297,
"learning_rate": 2.63671875e-06,
"loss": 1.0753,
"mean_token_accuracy": 0.7593718692660332,
"num_tokens": 7137946.0,
"step": 1180
},
{
"entropy": 1.1066975593566895,
"epoch": 1.502683928007578,
"grad_norm": 0.6664257645606995,
"learning_rate": 2.5716145833333334e-06,
"loss": 1.0642,
"mean_token_accuracy": 0.7612839996814728,
"num_tokens": 7200103.0,
"step": 1190
},
{
"entropy": 1.0994308680295943,
"epoch": 1.5153141774550047,
"grad_norm": 0.683022141456604,
"learning_rate": 2.506510416666667e-06,
"loss": 1.0726,
"mean_token_accuracy": 0.7611020535230637,
"num_tokens": 7259051.0,
"step": 1200
},
{
"entropy": 1.1130555748939515,
"epoch": 1.5279444269024314,
"grad_norm": 0.6556797623634338,
"learning_rate": 2.44140625e-06,
"loss": 1.0758,
"mean_token_accuracy": 0.7601210430264473,
"num_tokens": 7318904.0,
"step": 1210
},
{
"entropy": 1.112100276350975,
"epoch": 1.5405746763498578,
"grad_norm": 0.6336252689361572,
"learning_rate": 2.3763020833333338e-06,
"loss": 1.0691,
"mean_token_accuracy": 0.7613141894340515,
"num_tokens": 7378611.0,
"step": 1220
},
{
"entropy": 1.1018309980630874,
"epoch": 1.5532049257972846,
"grad_norm": 0.6907696723937988,
"learning_rate": 2.3111979166666667e-06,
"loss": 1.074,
"mean_token_accuracy": 0.7605119064450264,
"num_tokens": 7438960.0,
"step": 1230
},
{
"entropy": 1.071268692612648,
"epoch": 1.565835175244711,
"grad_norm": 0.6463876962661743,
"learning_rate": 2.2460937500000004e-06,
"loss": 1.0413,
"mean_token_accuracy": 0.7670892596244812,
"num_tokens": 7497275.0,
"step": 1240
},
{
"entropy": 1.134592017531395,
"epoch": 1.5784654246921377,
"grad_norm": 0.6366226077079773,
"learning_rate": 2.1809895833333337e-06,
"loss": 1.092,
"mean_token_accuracy": 0.7548690542578698,
"num_tokens": 7558285.0,
"step": 1250
},
{
"entropy": 1.1141762882471085,
"epoch": 1.5910956741395643,
"grad_norm": 0.6590870022773743,
"learning_rate": 2.1158854166666666e-06,
"loss": 1.0752,
"mean_token_accuracy": 0.7603784337639808,
"num_tokens": 7620287.0,
"step": 1260
},
{
"entropy": 1.09154414832592,
"epoch": 1.6037259235869907,
"grad_norm": 0.656830370426178,
"learning_rate": 2.0507812500000003e-06,
"loss": 1.0593,
"mean_token_accuracy": 0.76341772377491,
"num_tokens": 7681170.0,
"step": 1270
},
{
"entropy": 1.10728869587183,
"epoch": 1.6163561730344176,
"grad_norm": 0.6511245965957642,
"learning_rate": 1.9856770833333336e-06,
"loss": 1.0724,
"mean_token_accuracy": 0.7592613711953163,
"num_tokens": 7741125.0,
"step": 1280
},
{
"entropy": 1.1026839420199395,
"epoch": 1.628986422481844,
"grad_norm": 0.648682713508606,
"learning_rate": 1.920572916666667e-06,
"loss": 1.0678,
"mean_token_accuracy": 0.7615165829658508,
"num_tokens": 7801002.0,
"step": 1290
},
{
"entropy": 1.113681361079216,
"epoch": 1.6416166719292706,
"grad_norm": 0.6691455245018005,
"learning_rate": 1.8554687500000002e-06,
"loss": 1.0662,
"mean_token_accuracy": 0.76031324416399,
"num_tokens": 7861077.0,
"step": 1300
},
{
"entropy": 1.0854344859719276,
"epoch": 1.6542469213766973,
"grad_norm": 0.6221432685852051,
"learning_rate": 1.7903645833333335e-06,
"loss": 1.0564,
"mean_token_accuracy": 0.7638715595006943,
"num_tokens": 7920955.0,
"step": 1310
},
{
"entropy": 1.1128123462200166,
"epoch": 1.6668771708241237,
"grad_norm": 0.6376025080680847,
"learning_rate": 1.7252604166666668e-06,
"loss": 1.0803,
"mean_token_accuracy": 0.7579856783151626,
"num_tokens": 7981933.0,
"step": 1320
},
{
"entropy": 1.105194841325283,
"epoch": 1.6795074202715503,
"grad_norm": 0.712565541267395,
"learning_rate": 1.6601562500000001e-06,
"loss": 1.0758,
"mean_token_accuracy": 0.7577270165085792,
"num_tokens": 8042084.0,
"step": 1330
},
{
"entropy": 1.1065697744488716,
"epoch": 1.692137669718977,
"grad_norm": 0.6228471994400024,
"learning_rate": 1.5950520833333336e-06,
"loss": 1.0731,
"mean_token_accuracy": 0.7590463057160377,
"num_tokens": 8102976.0,
"step": 1340
},
{
"entropy": 1.1064435616135597,
"epoch": 1.7047679191664036,
"grad_norm": 0.6447433829307556,
"learning_rate": 1.5299479166666667e-06,
"loss": 1.0708,
"mean_token_accuracy": 0.7608707517385482,
"num_tokens": 8163206.0,
"step": 1350
},
{
"entropy": 1.1046179130673408,
"epoch": 1.7173981686138302,
"grad_norm": 0.6594550609588623,
"learning_rate": 1.46484375e-06,
"loss": 1.0606,
"mean_token_accuracy": 0.7627649754285812,
"num_tokens": 8224905.0,
"step": 1360
},
{
"entropy": 1.1129515051841736,
"epoch": 1.7300284180612566,
"grad_norm": 0.6550594568252563,
"learning_rate": 1.3997395833333335e-06,
"loss": 1.0741,
"mean_token_accuracy": 0.7577028945088387,
"num_tokens": 8286587.0,
"step": 1370
},
{
"entropy": 1.094475807249546,
"epoch": 1.7426586675086833,
"grad_norm": 0.6420894265174866,
"learning_rate": 1.3346354166666666e-06,
"loss": 1.0597,
"mean_token_accuracy": 0.7612502560019493,
"num_tokens": 8346283.0,
"step": 1380
},
{
"entropy": 1.1026990562677383,
"epoch": 1.75528891695611,
"grad_norm": 0.6647622585296631,
"learning_rate": 1.2695312500000002e-06,
"loss": 1.0705,
"mean_token_accuracy": 0.7592111378908157,
"num_tokens": 8406590.0,
"step": 1390
},
{
"entropy": 1.1024970307946205,
"epoch": 1.7679191664035365,
"grad_norm": 0.6625591516494751,
"learning_rate": 1.2044270833333335e-06,
"loss": 1.0783,
"mean_token_accuracy": 0.756389918923378,
"num_tokens": 8467230.0,
"step": 1400
},
{
"entropy": 1.1201951175928115,
"epoch": 1.7805494158509632,
"grad_norm": 0.6827495098114014,
"learning_rate": 1.1393229166666668e-06,
"loss": 1.0869,
"mean_token_accuracy": 0.756199948489666,
"num_tokens": 8527820.0,
"step": 1410
},
{
"entropy": 1.1082668006420135,
"epoch": 1.7931796652983896,
"grad_norm": 0.6496292948722839,
"learning_rate": 1.07421875e-06,
"loss": 1.0638,
"mean_token_accuracy": 0.7621515318751335,
"num_tokens": 8587544.0,
"step": 1420
},
{
"entropy": 1.0907854005694388,
"epoch": 1.8058099147458162,
"grad_norm": 0.6577737927436829,
"learning_rate": 1.0091145833333334e-06,
"loss": 1.0587,
"mean_token_accuracy": 0.7625794589519501,
"num_tokens": 8647424.0,
"step": 1430
},
{
"entropy": 1.0947823762893676,
"epoch": 1.8184401641932428,
"grad_norm": 0.6546240448951721,
"learning_rate": 9.440104166666668e-07,
"loss": 1.0648,
"mean_token_accuracy": 0.7618604898452759,
"num_tokens": 8706635.0,
"step": 1440
},
{
"entropy": 1.1016521960496903,
"epoch": 1.8310704136406692,
"grad_norm": 0.6349791884422302,
"learning_rate": 8.789062500000001e-07,
"loss": 1.0615,
"mean_token_accuracy": 0.7619734451174736,
"num_tokens": 8766624.0,
"step": 1450
},
{
"entropy": 1.110970026254654,
"epoch": 1.843700663088096,
"grad_norm": 0.6273230314254761,
"learning_rate": 8.138020833333334e-07,
"loss": 1.079,
"mean_token_accuracy": 0.7600797146558762,
"num_tokens": 8826556.0,
"step": 1460
},
{
"entropy": 1.1075817868113518,
"epoch": 1.8563309125355225,
"grad_norm": 0.6720101833343506,
"learning_rate": 7.486979166666668e-07,
"loss": 1.0695,
"mean_token_accuracy": 0.759764339029789,
"num_tokens": 8887460.0,
"step": 1470
},
{
"entropy": 1.0957570180296898,
"epoch": 1.8689611619829491,
"grad_norm": 0.6578065752983093,
"learning_rate": 6.835937500000001e-07,
"loss": 1.0658,
"mean_token_accuracy": 0.7630386680364609,
"num_tokens": 8947077.0,
"step": 1480
},
{
"entropy": 1.1054737836122512,
"epoch": 1.8815914114303758,
"grad_norm": 0.6177386045455933,
"learning_rate": 6.184895833333334e-07,
"loss": 1.062,
"mean_token_accuracy": 0.7603132933378219,
"num_tokens": 9008717.0,
"step": 1490
},
{
"entropy": 1.1026621460914612,
"epoch": 1.8942216608778022,
"grad_norm": 0.6226282119750977,
"learning_rate": 5.533854166666667e-07,
"loss": 1.0706,
"mean_token_accuracy": 0.7603669881820678,
"num_tokens": 9068623.0,
"step": 1500
},
{
"entropy": 1.1010279595851897,
"epoch": 1.906851910325229,
"grad_norm": 0.6504780650138855,
"learning_rate": 4.8828125e-07,
"loss": 1.0583,
"mean_token_accuracy": 0.7632956698536872,
"num_tokens": 9129086.0,
"step": 1510
},
{
"entropy": 1.1021641314029693,
"epoch": 1.9194821597726555,
"grad_norm": 0.667875349521637,
"learning_rate": 4.2317708333333337e-07,
"loss": 1.0698,
"mean_token_accuracy": 0.7609776973724365,
"num_tokens": 9189845.0,
"step": 1520
},
{
"entropy": 1.0909265503287315,
"epoch": 1.932112409220082,
"grad_norm": 0.650221586227417,
"learning_rate": 3.5807291666666667e-07,
"loss": 1.0541,
"mean_token_accuracy": 0.7616324663162232,
"num_tokens": 9250808.0,
"step": 1530
},
{
"entropy": 1.1204875528812408,
"epoch": 1.9447426586675087,
"grad_norm": 0.6560048460960388,
"learning_rate": 2.9296875000000003e-07,
"loss": 1.0821,
"mean_token_accuracy": 0.7588548183441162,
"num_tokens": 9312275.0,
"step": 1540
},
{
"entropy": 1.0873224779963493,
"epoch": 1.9573729081149351,
"grad_norm": 0.6481816172599792,
"learning_rate": 2.2786458333333333e-07,
"loss": 1.058,
"mean_token_accuracy": 0.762654073536396,
"num_tokens": 9372216.0,
"step": 1550
},
{
"entropy": 1.1072645708918571,
"epoch": 1.970003157562362,
"grad_norm": 0.6645349264144897,
"learning_rate": 1.627604166666667e-07,
"loss": 1.07,
"mean_token_accuracy": 0.7614389002323151,
"num_tokens": 9431986.0,
"step": 1560
},
{
"entropy": 1.1167670994997025,
"epoch": 1.9826334070097884,
"grad_norm": 0.6455146670341492,
"learning_rate": 9.765625e-08,
"loss": 1.0754,
"mean_token_accuracy": 0.7594234853982925,
"num_tokens": 9492961.0,
"step": 1570
},
{
"entropy": 1.104009985923767,
"epoch": 1.995263656457215,
"grad_norm": 0.6205505132675171,
"learning_rate": 3.2552083333333335e-08,
"loss": 1.0718,
"mean_token_accuracy": 0.7597839057445526,
"num_tokens": 9553806.0,
"step": 1580
}
],
"logging_steps": 10,
"max_steps": 1584,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.4237850982977536e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}