{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997740696510989, "eval_steps": 100, "global_step": 968, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0309278350515464e-06, "logits/chosen": -4.324154853820801, "logits/rejected": -4.269870758056641, "logps/chosen": -367.06219482421875, "logps/rejected": -317.6511535644531, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.0309278350515464e-05, "logits/chosen": -4.277936935424805, "logits/rejected": -4.137242317199707, "logps/chosen": -423.301513671875, "logps/rejected": -322.6667175292969, "loss": 0.6944, "rewards/accuracies": 0.4340277910232544, "rewards/chosen": -0.002464266261085868, "rewards/margins": -0.0019812113605439663, "rewards/rejected": -0.0004830548132304102, "step": 10 }, { "epoch": 0.02, "learning_rate": 2.0618556701030927e-05, "logits/chosen": -4.263987064361572, "logits/rejected": -4.143830299377441, "logps/chosen": -392.2227783203125, "logps/rejected": -317.55157470703125, "loss": 0.6908, "rewards/accuracies": 0.5390625, "rewards/chosen": 0.008146456442773342, "rewards/margins": 0.0054251509718596935, "rewards/rejected": 0.0027213054709136486, "step": 20 }, { "epoch": 0.03, "learning_rate": 3.0927835051546395e-05, "logits/chosen": -4.266225337982178, "logits/rejected": -4.155703544616699, "logps/chosen": -406.10809326171875, "logps/rejected": -324.8602600097656, "loss": 0.6854, "rewards/accuracies": 0.578125, "rewards/chosen": 0.043196842074394226, "rewards/margins": 0.017555737867951393, "rewards/rejected": 0.025641104206442833, "step": 30 }, { "epoch": 0.04, "learning_rate": 4.1237113402061855e-05, "logits/chosen": -4.249855041503906, "logits/rejected": -4.156338691711426, "logps/chosen": -375.7052307128906, "logps/rejected": -312.56158447265625, "loss": 0.6671, "rewards/accuracies": 0.6265624761581421, "rewards/chosen": 0.1265665739774704, "rewards/margins": 0.060109008103609085, "rewards/rejected": 0.06645756959915161, "step": 40 }, { "epoch": 0.05, "learning_rate": 5.1546391752577315e-05, "logits/chosen": -4.269591331481934, "logits/rejected": -4.160136699676514, "logps/chosen": -395.27069091796875, "logps/rejected": -313.8091735839844, "loss": 0.6334, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.21795189380645752, "rewards/margins": 0.15998400747776031, "rewards/rejected": 0.05796787887811661, "step": 50 }, { "epoch": 0.06, "learning_rate": 6.185567010309279e-05, "logits/chosen": -4.272132396697998, "logits/rejected": -4.149416446685791, "logps/chosen": -395.2691955566406, "logps/rejected": -319.5643615722656, "loss": 0.6073, "rewards/accuracies": 0.667187511920929, "rewards/chosen": 0.3055054843425751, "rewards/margins": 0.24846644699573517, "rewards/rejected": 0.0570390410721302, "step": 60 }, { "epoch": 0.07, "learning_rate": 7.216494845360825e-05, "logits/chosen": -4.253750324249268, "logits/rejected": -4.135117530822754, "logps/chosen": -399.1761169433594, "logps/rejected": -302.0652160644531, "loss": 0.5963, "rewards/accuracies": 0.6875, "rewards/chosen": 0.2813766598701477, "rewards/margins": 0.33464515209198, "rewards/rejected": -0.05326848104596138, "step": 70 }, { "epoch": 0.08, "learning_rate": 8.247422680412371e-05, "logits/chosen": -4.241095542907715, "logits/rejected": -4.122071743011475, "logps/chosen": -419.2793884277344, "logps/rejected": -319.5242919921875, "loss": 0.5735, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.28460922837257385, "rewards/margins": 0.4524053633213043, "rewards/rejected": -0.16779613494873047, "step": 80 }, { "epoch": 0.09, "learning_rate": 9.278350515463918e-05, "logits/chosen": -4.246251583099365, "logits/rejected": -4.106213569641113, "logps/chosen": -402.74224853515625, "logps/rejected": -323.2504577636719, "loss": 0.6033, "rewards/accuracies": 0.660937488079071, "rewards/chosen": 0.24649712443351746, "rewards/margins": 0.40387439727783203, "rewards/rejected": -0.1573772430419922, "step": 90 }, { "epoch": 0.1, "learning_rate": 9.965556831228473e-05, "logits/chosen": -4.269595146179199, "logits/rejected": -4.139230251312256, "logps/chosen": -398.0285949707031, "logps/rejected": -309.89703369140625, "loss": 0.592, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": 0.32199668884277344, "rewards/margins": 0.42783433198928833, "rewards/rejected": -0.10583765804767609, "step": 100 }, { "epoch": 0.11, "learning_rate": 9.850746268656717e-05, "logits/chosen": -4.21318244934082, "logits/rejected": -4.068546295166016, "logps/chosen": -417.46673583984375, "logps/rejected": -304.0533752441406, "loss": 0.5726, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.33084017038345337, "rewards/margins": 0.5065333843231201, "rewards/rejected": -0.17569322884082794, "step": 110 }, { "epoch": 0.12, "learning_rate": 9.73593570608496e-05, "logits/chosen": -4.242104530334473, "logits/rejected": -4.1272687911987305, "logps/chosen": -402.74591064453125, "logps/rejected": -324.69183349609375, "loss": 0.5899, "rewards/accuracies": 0.6703125238418579, "rewards/chosen": 0.30437177419662476, "rewards/margins": 0.45763349533081055, "rewards/rejected": -0.1532617062330246, "step": 120 }, { "epoch": 0.13, "learning_rate": 9.621125143513204e-05, "logits/chosen": -4.22554874420166, "logits/rejected": -4.083498001098633, "logps/chosen": -398.24945068359375, "logps/rejected": -300.625, "loss": 0.573, "rewards/accuracies": 0.6875, "rewards/chosen": 0.33012667298316956, "rewards/margins": 0.5282346606254578, "rewards/rejected": -0.1981080025434494, "step": 130 }, { "epoch": 0.14, "learning_rate": 9.506314580941446e-05, "logits/chosen": -4.226154804229736, "logits/rejected": -4.101135730743408, "logps/chosen": -409.64361572265625, "logps/rejected": -306.75164794921875, "loss": 0.5902, "rewards/accuracies": 0.651562511920929, "rewards/chosen": 0.29234111309051514, "rewards/margins": 0.4638133645057678, "rewards/rejected": -0.1714722216129303, "step": 140 }, { "epoch": 0.15, "learning_rate": 9.39150401836969e-05, "logits/chosen": -4.1987833976745605, "logits/rejected": -4.084831237792969, "logps/chosen": -379.6610107421875, "logps/rejected": -302.0074462890625, "loss": 0.5826, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.18739664554595947, "rewards/margins": 0.4692758619785309, "rewards/rejected": -0.2818792164325714, "step": 150 }, { "epoch": 0.17, "learning_rate": 9.276693455797933e-05, "logits/chosen": -4.202162265777588, "logits/rejected": -4.0508928298950195, "logps/chosen": -397.18414306640625, "logps/rejected": -314.05401611328125, "loss": 0.5614, "rewards/accuracies": 0.71875, "rewards/chosen": 0.22261695563793182, "rewards/margins": 0.5240545868873596, "rewards/rejected": -0.3014376163482666, "step": 160 }, { "epoch": 0.18, "learning_rate": 9.161882893226177e-05, "logits/chosen": -4.191584587097168, "logits/rejected": -4.02324914932251, "logps/chosen": -417.01611328125, "logps/rejected": -317.5060119628906, "loss": 0.5867, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.23040691018104553, "rewards/margins": 0.5268203020095825, "rewards/rejected": -0.296413391828537, "step": 170 }, { "epoch": 0.19, "learning_rate": 9.047072330654421e-05, "logits/chosen": -4.206202507019043, "logits/rejected": -4.087266445159912, "logps/chosen": -410.1561584472656, "logps/rejected": -326.1917724609375, "loss": 0.6008, "rewards/accuracies": 0.6703125238418579, "rewards/chosen": 0.31394436955451965, "rewards/margins": 0.46465569734573364, "rewards/rejected": -0.15071135759353638, "step": 180 }, { "epoch": 0.2, "learning_rate": 8.932261768082664e-05, "logits/chosen": -4.241537094116211, "logits/rejected": -4.068553924560547, "logps/chosen": -414.43792724609375, "logps/rejected": -317.09149169921875, "loss": 0.5623, "rewards/accuracies": 0.7109375, "rewards/chosen": 0.2688312828540802, "rewards/margins": 0.5498431921005249, "rewards/rejected": -0.2810118794441223, "step": 190 }, { "epoch": 0.21, "learning_rate": 8.817451205510908e-05, "logits/chosen": -4.214895725250244, "logits/rejected": -4.07419490814209, "logps/chosen": -378.5306701660156, "logps/rejected": -304.207763671875, "loss": 0.5837, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": 0.2962898313999176, "rewards/margins": 0.5190107226371765, "rewards/rejected": -0.2227208912372589, "step": 200 }, { "epoch": 0.22, "learning_rate": 8.70264064293915e-05, "logits/chosen": -4.193976402282715, "logits/rejected": -4.08192777633667, "logps/chosen": -403.6576843261719, "logps/rejected": -333.78564453125, "loss": 0.5837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.34363043308258057, "rewards/margins": 0.5302781462669373, "rewards/rejected": -0.18664774298667908, "step": 210 }, { "epoch": 0.23, "learning_rate": 8.587830080367394e-05, "logits/chosen": -4.192063331604004, "logits/rejected": -4.060949325561523, "logps/chosen": -394.6471252441406, "logps/rejected": -313.9928894042969, "loss": 0.5889, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.2043389081954956, "rewards/margins": 0.47708654403686523, "rewards/rejected": -0.27274760603904724, "step": 220 }, { "epoch": 0.24, "learning_rate": 8.473019517795637e-05, "logits/chosen": -4.191307544708252, "logits/rejected": -4.050302505493164, "logps/chosen": -406.06610107421875, "logps/rejected": -323.23309326171875, "loss": 0.5677, "rewards/accuracies": 0.698437511920929, "rewards/chosen": 0.23044082522392273, "rewards/margins": 0.5557659864425659, "rewards/rejected": -0.3253251612186432, "step": 230 }, { "epoch": 0.25, "learning_rate": 8.358208955223881e-05, "logits/chosen": -4.186672210693359, "logits/rejected": -4.0680832862854, "logps/chosen": -407.0450744628906, "logps/rejected": -331.31365966796875, "loss": 0.5673, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.2235272377729416, "rewards/margins": 0.5514543652534485, "rewards/rejected": -0.3279270529747009, "step": 240 }, { "epoch": 0.26, "learning_rate": 8.243398392652125e-05, "logits/chosen": -4.196033000946045, "logits/rejected": -4.087727069854736, "logps/chosen": -403.15374755859375, "logps/rejected": -324.0137939453125, "loss": 0.595, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.03976796939969063, "rewards/margins": 0.4899526536464691, "rewards/rejected": -0.45018473267555237, "step": 250 }, { "epoch": 0.27, "learning_rate": 8.128587830080367e-05, "logits/chosen": -4.203690528869629, "logits/rejected": -4.059721946716309, "logps/chosen": -378.96044921875, "logps/rejected": -294.76849365234375, "loss": 0.5576, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.01410858053714037, "rewards/margins": 0.5529131293296814, "rewards/rejected": -0.5388045310974121, "step": 260 }, { "epoch": 0.28, "learning_rate": 8.013777267508611e-05, "logits/chosen": -4.201757431030273, "logits/rejected": -4.062942981719971, "logps/chosen": -422.6997985839844, "logps/rejected": -323.6975402832031, "loss": 0.5456, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.1681019812822342, "rewards/margins": 0.6470087170600891, "rewards/rejected": -0.4789067208766937, "step": 270 }, { "epoch": 0.29, "learning_rate": 7.898966704936854e-05, "logits/chosen": -4.207779407501221, "logits/rejected": -4.054646968841553, "logps/chosen": -406.03106689453125, "logps/rejected": -310.811279296875, "loss": 0.561, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.21866166591644287, "rewards/margins": 0.6091804504394531, "rewards/rejected": -0.39051881432533264, "step": 280 }, { "epoch": 0.3, "learning_rate": 7.784156142365098e-05, "logits/chosen": -4.202643394470215, "logits/rejected": -4.071869373321533, "logps/chosen": -382.42974853515625, "logps/rejected": -300.3152770996094, "loss": 0.5772, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.147455632686615, "rewards/margins": 0.5467114448547363, "rewards/rejected": -0.39925578236579895, "step": 290 }, { "epoch": 0.31, "learning_rate": 7.66934557979334e-05, "logits/chosen": -4.202780723571777, "logits/rejected": -4.08065938949585, "logps/chosen": -393.5049133300781, "logps/rejected": -311.8150634765625, "loss": 0.5805, "rewards/accuracies": 0.707812488079071, "rewards/chosen": 0.21916493773460388, "rewards/margins": 0.5547321438789368, "rewards/rejected": -0.3355671763420105, "step": 300 }, { "epoch": 0.32, "learning_rate": 7.554535017221585e-05, "logits/chosen": -4.187916278839111, "logits/rejected": -4.059911727905273, "logps/chosen": -402.950439453125, "logps/rejected": -320.72747802734375, "loss": 0.5574, "rewards/accuracies": 0.723437488079071, "rewards/chosen": 0.2509889304637909, "rewards/margins": 0.6315642595291138, "rewards/rejected": -0.3805752396583557, "step": 310 }, { "epoch": 0.33, "learning_rate": 7.439724454649829e-05, "logits/chosen": -4.20109748840332, "logits/rejected": -4.057183265686035, "logps/chosen": -405.6755065917969, "logps/rejected": -305.56280517578125, "loss": 0.5622, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": 0.10838484764099121, "rewards/margins": 0.5731135606765747, "rewards/rejected": -0.4647287428379059, "step": 320 }, { "epoch": 0.34, "learning_rate": 7.324913892078071e-05, "logits/chosen": -4.224749565124512, "logits/rejected": -4.100439071655273, "logps/chosen": -404.3746643066406, "logps/rejected": -325.85626220703125, "loss": 0.5335, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": 0.1826043277978897, "rewards/margins": 0.6973450183868408, "rewards/rejected": -0.5147407054901123, "step": 330 }, { "epoch": 0.35, "learning_rate": 7.210103329506315e-05, "logits/chosen": -4.193475246429443, "logits/rejected": -4.071505069732666, "logps/chosen": -382.69415283203125, "logps/rejected": -313.68255615234375, "loss": 0.56, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": 0.004598347935825586, "rewards/margins": 0.6110485792160034, "rewards/rejected": -0.6064502000808716, "step": 340 }, { "epoch": 0.36, "learning_rate": 7.095292766934558e-05, "logits/chosen": -4.205704212188721, "logits/rejected": -4.08672571182251, "logps/chosen": -396.59588623046875, "logps/rejected": -314.82794189453125, "loss": 0.5673, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.03806200250983238, "rewards/margins": 0.5792189836502075, "rewards/rejected": -0.541157066822052, "step": 350 }, { "epoch": 0.37, "learning_rate": 6.980482204362802e-05, "logits/chosen": -4.210749626159668, "logits/rejected": -4.0535149574279785, "logps/chosen": -404.23590087890625, "logps/rejected": -318.4687805175781, "loss": 0.5458, "rewards/accuracies": 0.7109375, "rewards/chosen": 0.10304062068462372, "rewards/margins": 0.6985061764717102, "rewards/rejected": -0.5954655408859253, "step": 360 }, { "epoch": 0.38, "learning_rate": 6.865671641791044e-05, "logits/chosen": -4.188273906707764, "logits/rejected": -4.062605381011963, "logps/chosen": -398.55841064453125, "logps/rejected": -332.02703857421875, "loss": 0.5408, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.05233822017908096, "rewards/margins": 0.6659021377563477, "rewards/rejected": -0.6135639548301697, "step": 370 }, { "epoch": 0.39, "learning_rate": 6.750861079219288e-05, "logits/chosen": -4.17733097076416, "logits/rejected": -4.063222885131836, "logps/chosen": -393.2876281738281, "logps/rejected": -324.6680908203125, "loss": 0.5675, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.021370118483901024, "rewards/margins": 0.6500793695449829, "rewards/rejected": -0.6714495420455933, "step": 380 }, { "epoch": 0.4, "learning_rate": 6.636050516647532e-05, "logits/chosen": -4.192882537841797, "logits/rejected": -4.0380754470825195, "logps/chosen": -396.90423583984375, "logps/rejected": -310.64691162109375, "loss": 0.5382, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.042632658034563065, "rewards/margins": 0.6727082133293152, "rewards/rejected": -0.6300755739212036, "step": 390 }, { "epoch": 0.41, "learning_rate": 6.521239954075775e-05, "logits/chosen": -4.2160844802856445, "logits/rejected": -4.062317848205566, "logps/chosen": -411.5877380371094, "logps/rejected": -319.33123779296875, "loss": 0.5591, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.09205110371112823, "rewards/margins": 0.6733552813529968, "rewards/rejected": -0.5813042521476746, "step": 400 }, { "epoch": 0.42, "learning_rate": 6.406429391504019e-05, "logits/chosen": -4.208608150482178, "logits/rejected": -4.115554332733154, "logps/chosen": -398.64483642578125, "logps/rejected": -330.0683288574219, "loss": 0.5743, "rewards/accuracies": 0.703125, "rewards/chosen": 0.07452201843261719, "rewards/margins": 0.6016975045204163, "rewards/rejected": -0.5271755456924438, "step": 410 }, { "epoch": 0.43, "learning_rate": 6.291618828932262e-05, "logits/chosen": -4.216838836669922, "logits/rejected": -4.073526859283447, "logps/chosen": -416.29229736328125, "logps/rejected": -323.9781799316406, "loss": 0.539, "rewards/accuracies": 0.734375, "rewards/chosen": 0.2136647254228592, "rewards/margins": 0.7078708410263062, "rewards/rejected": -0.49420619010925293, "step": 420 }, { "epoch": 0.44, "learning_rate": 6.176808266360506e-05, "logits/chosen": -4.1789093017578125, "logits/rejected": -4.057516574859619, "logps/chosen": -383.4624938964844, "logps/rejected": -300.4764404296875, "loss": 0.5447, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.08707320690155029, "rewards/margins": 0.6318439841270447, "rewards/rejected": -0.5447708368301392, "step": 430 }, { "epoch": 0.45, "learning_rate": 6.061997703788749e-05, "logits/chosen": -4.218086242675781, "logits/rejected": -4.057248592376709, "logps/chosen": -413.1617736816406, "logps/rejected": -320.5242919921875, "loss": 0.5656, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": 0.1771887242794037, "rewards/margins": 0.6332125067710876, "rewards/rejected": -0.45602384209632874, "step": 440 }, { "epoch": 0.46, "learning_rate": 5.947187141216992e-05, "logits/chosen": -4.181551933288574, "logits/rejected": -4.086193561553955, "logps/chosen": -399.1908264160156, "logps/rejected": -331.75390625, "loss": 0.5686, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": 0.1644556224346161, "rewards/margins": 0.6215040683746338, "rewards/rejected": -0.4570484161376953, "step": 450 }, { "epoch": 0.48, "learning_rate": 5.8323765786452354e-05, "logits/chosen": -4.1721296310424805, "logits/rejected": -4.040920257568359, "logps/chosen": -392.23065185546875, "logps/rejected": -312.1791076660156, "loss": 0.5546, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.24298326671123505, "rewards/margins": 0.6002477407455444, "rewards/rejected": -0.3572644293308258, "step": 460 }, { "epoch": 0.49, "learning_rate": 5.717566016073479e-05, "logits/chosen": -4.190958023071289, "logits/rejected": -4.052657127380371, "logps/chosen": -417.098388671875, "logps/rejected": -327.164306640625, "loss": 0.5625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.2720445990562439, "rewards/margins": 0.628037691116333, "rewards/rejected": -0.3559931218624115, "step": 470 }, { "epoch": 0.5, "learning_rate": 5.602755453501722e-05, "logits/chosen": -4.198035717010498, "logits/rejected": -4.058812141418457, "logps/chosen": -397.5671691894531, "logps/rejected": -318.50677490234375, "loss": 0.5277, "rewards/accuracies": 0.75, "rewards/chosen": 0.26485422253608704, "rewards/margins": 0.7160789370536804, "rewards/rejected": -0.4512247145175934, "step": 480 }, { "epoch": 0.51, "learning_rate": 5.487944890929966e-05, "logits/chosen": -4.172371864318848, "logits/rejected": -4.0723066329956055, "logps/chosen": -395.58404541015625, "logps/rejected": -312.6333312988281, "loss": 0.5354, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": 0.2391396313905716, "rewards/margins": 0.6927992105484009, "rewards/rejected": -0.4536595344543457, "step": 490 }, { "epoch": 0.52, "learning_rate": 5.373134328358209e-05, "logits/chosen": -4.170714855194092, "logits/rejected": -4.034462928771973, "logps/chosen": -409.53192138671875, "logps/rejected": -312.90863037109375, "loss": 0.5662, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.27997761964797974, "rewards/margins": 0.6217042803764343, "rewards/rejected": -0.341726690530777, "step": 500 }, { "epoch": 0.53, "learning_rate": 5.2583237657864526e-05, "logits/chosen": -4.204545021057129, "logits/rejected": -4.047720909118652, "logps/chosen": -407.4417419433594, "logps/rejected": -303.8612060546875, "loss": 0.5176, "rewards/accuracies": 0.739062488079071, "rewards/chosen": 0.2186679095029831, "rewards/margins": 0.7266091108322144, "rewards/rejected": -0.5079413652420044, "step": 510 }, { "epoch": 0.54, "learning_rate": 5.143513203214696e-05, "logits/chosen": -4.181861877441406, "logits/rejected": -4.042303085327148, "logps/chosen": -406.1308288574219, "logps/rejected": -324.74261474609375, "loss": 0.5589, "rewards/accuracies": 0.71875, "rewards/chosen": 0.21417653560638428, "rewards/margins": 0.7197046279907227, "rewards/rejected": -0.5055280923843384, "step": 520 }, { "epoch": 0.55, "learning_rate": 5.028702640642939e-05, "logits/chosen": -4.163634300231934, "logits/rejected": -4.041086673736572, "logps/chosen": -411.84881591796875, "logps/rejected": -328.6117248535156, "loss": 0.5375, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.27732527256011963, "rewards/margins": 0.7520377039909363, "rewards/rejected": -0.47471246123313904, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.9138920780711825e-05, "logits/chosen": -4.185253143310547, "logits/rejected": -4.089067459106445, "logps/chosen": -394.6053771972656, "logps/rejected": -318.41400146484375, "loss": 0.5666, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.11947999894618988, "rewards/margins": 0.6376982927322388, "rewards/rejected": -0.5182183384895325, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.7990815154994265e-05, "logits/chosen": -4.213289737701416, "logits/rejected": -4.085998058319092, "logps/chosen": -399.3802795410156, "logps/rejected": -328.15478515625, "loss": 0.5758, "rewards/accuracies": 0.698437511920929, "rewards/chosen": 0.1032366007566452, "rewards/margins": 0.5791338682174683, "rewards/rejected": -0.4758972227573395, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.68427095292767e-05, "logits/chosen": -4.1805009841918945, "logits/rejected": -4.031790733337402, "logps/chosen": -409.77362060546875, "logps/rejected": -324.82574462890625, "loss": 0.5384, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.20845262706279755, "rewards/margins": 0.6964353322982788, "rewards/rejected": -0.48798269033432007, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.569460390355913e-05, "logits/chosen": -4.235301971435547, "logits/rejected": -4.049469947814941, "logps/chosen": -409.75299072265625, "logps/rejected": -315.889404296875, "loss": 0.5272, "rewards/accuracies": 0.739062488079071, "rewards/chosen": 0.2591592073440552, "rewards/margins": 0.75799959897995, "rewards/rejected": -0.49884042143821716, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.4546498277841564e-05, "logits/chosen": -4.197813987731934, "logits/rejected": -4.053323745727539, "logps/chosen": -409.5298767089844, "logps/rejected": -326.00872802734375, "loss": 0.5497, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": 0.23509593307971954, "rewards/margins": 0.6796003580093384, "rewards/rejected": -0.44450441002845764, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.3398392652124e-05, "logits/chosen": -4.206587314605713, "logits/rejected": -4.077775955200195, "logps/chosen": -406.1709289550781, "logps/rejected": -316.99945068359375, "loss": 0.5469, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": 0.2390655279159546, "rewards/margins": 0.6781237721443176, "rewards/rejected": -0.4390583038330078, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.225028702640643e-05, "logits/chosen": -4.212055206298828, "logits/rejected": -4.080319404602051, "logps/chosen": -401.8226013183594, "logps/rejected": -326.7524719238281, "loss": 0.5434, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.24123027920722961, "rewards/margins": 0.6904546022415161, "rewards/rejected": -0.4492243230342865, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.1102181400688863e-05, "logits/chosen": -4.197389125823975, "logits/rejected": -4.070887565612793, "logps/chosen": -424.982421875, "logps/rejected": -338.803955078125, "loss": 0.5327, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": 0.28303179144859314, "rewards/margins": 0.7425665855407715, "rewards/rejected": -0.45953473448753357, "step": 610 }, { "epoch": 0.64, "learning_rate": 3.99540757749713e-05, "logits/chosen": -4.205746650695801, "logits/rejected": -4.061286926269531, "logps/chosen": -395.08233642578125, "logps/rejected": -304.86712646484375, "loss": 0.5524, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.20203503966331482, "rewards/margins": 0.7106438875198364, "rewards/rejected": -0.5086088180541992, "step": 620 }, { "epoch": 0.65, "learning_rate": 3.8805970149253736e-05, "logits/chosen": -4.208118915557861, "logits/rejected": -4.079540729522705, "logps/chosen": -406.8816833496094, "logps/rejected": -315.55712890625, "loss": 0.5474, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.18478946387767792, "rewards/margins": 0.6608500480651855, "rewards/rejected": -0.47606056928634644, "step": 630 }, { "epoch": 0.66, "learning_rate": 3.765786452353617e-05, "logits/chosen": -4.179257392883301, "logits/rejected": -4.0610671043396, "logps/chosen": -412.61962890625, "logps/rejected": -335.36517333984375, "loss": 0.5369, "rewards/accuracies": 0.71875, "rewards/chosen": 0.21612036228179932, "rewards/margins": 0.7052344083786011, "rewards/rejected": -0.48911404609680176, "step": 640 }, { "epoch": 0.67, "learning_rate": 3.65097588978186e-05, "logits/chosen": -4.213753700256348, "logits/rejected": -4.085109710693359, "logps/chosen": -416.76959228515625, "logps/rejected": -323.77288818359375, "loss": 0.5625, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": 0.23179855942726135, "rewards/margins": 0.6918643712997437, "rewards/rejected": -0.4600658416748047, "step": 650 }, { "epoch": 0.68, "learning_rate": 3.5361653272101035e-05, "logits/chosen": -4.171154499053955, "logits/rejected": -4.042459487915039, "logps/chosen": -419.46881103515625, "logps/rejected": -331.4931945800781, "loss": 0.5522, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": 0.25308963656425476, "rewards/margins": 0.6450797319412231, "rewards/rejected": -0.39199012517929077, "step": 660 }, { "epoch": 0.69, "learning_rate": 3.421354764638347e-05, "logits/chosen": -4.187798500061035, "logits/rejected": -4.070495128631592, "logps/chosen": -400.662353515625, "logps/rejected": -315.8260498046875, "loss": 0.5445, "rewards/accuracies": 0.723437488079071, "rewards/chosen": 0.23092904686927795, "rewards/margins": 0.692219614982605, "rewards/rejected": -0.46129053831100464, "step": 670 }, { "epoch": 0.7, "learning_rate": 3.30654420206659e-05, "logits/chosen": -4.20836067199707, "logits/rejected": -4.072371482849121, "logps/chosen": -400.0563049316406, "logps/rejected": -323.396728515625, "loss": 0.5504, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.22010056674480438, "rewards/margins": 0.6868074536323547, "rewards/rejected": -0.46670693159103394, "step": 680 }, { "epoch": 0.71, "learning_rate": 3.191733639494834e-05, "logits/chosen": -4.20003604888916, "logits/rejected": -4.075405597686768, "logps/chosen": -403.5914611816406, "logps/rejected": -322.0238037109375, "loss": 0.5532, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": 0.22083628177642822, "rewards/margins": 0.645309329032898, "rewards/rejected": -0.4244731068611145, "step": 690 }, { "epoch": 0.72, "learning_rate": 3.0769230769230774e-05, "logits/chosen": -4.246481895446777, "logits/rejected": -4.108793258666992, "logps/chosen": -413.4231872558594, "logps/rejected": -317.5379638671875, "loss": 0.5305, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.39659667015075684, "rewards/margins": 0.7596049308776855, "rewards/rejected": -0.3630082607269287, "step": 700 }, { "epoch": 0.73, "learning_rate": 2.9621125143513207e-05, "logits/chosen": -4.222532272338867, "logits/rejected": -4.083676815032959, "logps/chosen": -419.8621520996094, "logps/rejected": -334.4248046875, "loss": 0.5573, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.3562389612197876, "rewards/margins": 0.6699641346931458, "rewards/rejected": -0.31372514367103577, "step": 710 }, { "epoch": 0.74, "learning_rate": 2.847301951779564e-05, "logits/chosen": -4.215832710266113, "logits/rejected": -4.109940528869629, "logps/chosen": -377.0066833496094, "logps/rejected": -303.85125732421875, "loss": 0.5694, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": 0.25494620203971863, "rewards/margins": 0.6031069159507751, "rewards/rejected": -0.3481607139110565, "step": 720 }, { "epoch": 0.75, "learning_rate": 2.7324913892078073e-05, "logits/chosen": -4.17380428314209, "logits/rejected": -4.056889533996582, "logps/chosen": -372.79669189453125, "logps/rejected": -302.70733642578125, "loss": 0.5638, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.2314002513885498, "rewards/margins": 0.6297930479049683, "rewards/rejected": -0.3983927369117737, "step": 730 }, { "epoch": 0.76, "learning_rate": 2.617680826636051e-05, "logits/chosen": -4.194275856018066, "logits/rejected": -4.059576988220215, "logps/chosen": -375.8843078613281, "logps/rejected": -292.58154296875, "loss": 0.5495, "rewards/accuracies": 0.723437488079071, "rewards/chosen": 0.2551327049732208, "rewards/margins": 0.6916426420211792, "rewards/rejected": -0.436509907245636, "step": 740 }, { "epoch": 0.77, "learning_rate": 2.5028702640642943e-05, "logits/chosen": -4.205421447753906, "logits/rejected": -4.062956809997559, "logps/chosen": -400.41650390625, "logps/rejected": -315.60467529296875, "loss": 0.5576, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.21110089123249054, "rewards/margins": 0.6705206632614136, "rewards/rejected": -0.45941978693008423, "step": 750 }, { "epoch": 0.78, "learning_rate": 2.3880597014925373e-05, "logits/chosen": -4.213685512542725, "logits/rejected": -4.113875865936279, "logps/chosen": -405.49822998046875, "logps/rejected": -323.603759765625, "loss": 0.546, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.27266332507133484, "rewards/margins": 0.6973702311515808, "rewards/rejected": -0.4247068464756012, "step": 760 }, { "epoch": 0.8, "learning_rate": 2.273249138920781e-05, "logits/chosen": -4.2165117263793945, "logits/rejected": -4.080572605133057, "logps/chosen": -399.1180419921875, "logps/rejected": -311.00799560546875, "loss": 0.5538, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.27140167355537415, "rewards/margins": 0.6971061825752258, "rewards/rejected": -0.4257044792175293, "step": 770 }, { "epoch": 0.81, "learning_rate": 2.1584385763490242e-05, "logits/chosen": -4.225666046142578, "logits/rejected": -4.073777198791504, "logps/chosen": -396.8205871582031, "logps/rejected": -308.5452575683594, "loss": 0.5626, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.26160120964050293, "rewards/margins": 0.6686427593231201, "rewards/rejected": -0.40704160928726196, "step": 780 }, { "epoch": 0.82, "learning_rate": 2.0436280137772675e-05, "logits/chosen": -4.177059650421143, "logits/rejected": -4.059685230255127, "logps/chosen": -410.745361328125, "logps/rejected": -332.66864013671875, "loss": 0.5504, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.26947110891342163, "rewards/margins": 0.6891570091247559, "rewards/rejected": -0.4196859300136566, "step": 790 }, { "epoch": 0.83, "learning_rate": 1.928817451205511e-05, "logits/chosen": -4.1995649337768555, "logits/rejected": -4.093672275543213, "logps/chosen": -402.1859436035156, "logps/rejected": -334.12762451171875, "loss": 0.5822, "rewards/accuracies": 0.698437511920929, "rewards/chosen": 0.22209081053733826, "rewards/margins": 0.5844244360923767, "rewards/rejected": -0.3623336851596832, "step": 800 }, { "epoch": 0.84, "learning_rate": 1.8140068886337545e-05, "logits/chosen": -4.231449604034424, "logits/rejected": -4.087018013000488, "logps/chosen": -399.5769958496094, "logps/rejected": -315.0458984375, "loss": 0.5724, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": 0.2985903322696686, "rewards/margins": 0.6237133741378784, "rewards/rejected": -0.3251231014728546, "step": 810 }, { "epoch": 0.85, "learning_rate": 1.6991963260619978e-05, "logits/chosen": -4.21367883682251, "logits/rejected": -4.080574989318848, "logps/chosen": -406.123291015625, "logps/rejected": -325.72576904296875, "loss": 0.5468, "rewards/accuracies": 0.71875, "rewards/chosen": 0.2745881676673889, "rewards/margins": 0.6910631060600281, "rewards/rejected": -0.41647496819496155, "step": 820 }, { "epoch": 0.86, "learning_rate": 1.584385763490241e-05, "logits/chosen": -4.216300010681152, "logits/rejected": -4.086144924163818, "logps/chosen": -396.7312927246094, "logps/rejected": -320.1023864746094, "loss": 0.5651, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.265908807516098, "rewards/margins": 0.6358457207679749, "rewards/rejected": -0.3699369430541992, "step": 830 }, { "epoch": 0.87, "learning_rate": 1.4695752009184845e-05, "logits/chosen": -4.200199127197266, "logits/rejected": -4.1011481285095215, "logps/chosen": -381.8200988769531, "logps/rejected": -320.4044494628906, "loss": 0.5839, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.23138895630836487, "rewards/margins": 0.5585998296737671, "rewards/rejected": -0.327210932970047, "step": 840 }, { "epoch": 0.88, "learning_rate": 1.354764638346728e-05, "logits/chosen": -4.2150444984436035, "logits/rejected": -4.042110919952393, "logps/chosen": -406.70062255859375, "logps/rejected": -323.6236267089844, "loss": 0.5479, "rewards/accuracies": 0.703125, "rewards/chosen": 0.27608898282051086, "rewards/margins": 0.6516892313957214, "rewards/rejected": -0.37560024857521057, "step": 850 }, { "epoch": 0.89, "learning_rate": 1.2399540757749715e-05, "logits/chosen": -4.210223197937012, "logits/rejected": -4.12299108505249, "logps/chosen": -392.59161376953125, "logps/rejected": -334.56085205078125, "loss": 0.5665, "rewards/accuracies": 0.707812488079071, "rewards/chosen": 0.2766234278678894, "rewards/margins": 0.6172455549240112, "rewards/rejected": -0.340622216463089, "step": 860 }, { "epoch": 0.9, "learning_rate": 1.1251435132032148e-05, "logits/chosen": -4.190002918243408, "logits/rejected": -4.067898273468018, "logps/chosen": -399.5977478027344, "logps/rejected": -325.31439208984375, "loss": 0.5461, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": 0.2887663245201111, "rewards/margins": 0.6452386975288391, "rewards/rejected": -0.35647234320640564, "step": 870 }, { "epoch": 0.91, "learning_rate": 1.010332950631458e-05, "logits/chosen": -4.196687698364258, "logits/rejected": -4.076775550842285, "logps/chosen": -385.01556396484375, "logps/rejected": -305.6297912597656, "loss": 0.5525, "rewards/accuracies": 0.723437488079071, "rewards/chosen": 0.2339615821838379, "rewards/margins": 0.6333780884742737, "rewards/rejected": -0.3994165360927582, "step": 880 }, { "epoch": 0.92, "learning_rate": 8.955223880597016e-06, "logits/chosen": -4.185070037841797, "logits/rejected": -4.042351722717285, "logps/chosen": -421.82257080078125, "logps/rejected": -322.86004638671875, "loss": 0.5357, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.3234316110610962, "rewards/margins": 0.7073397040367126, "rewards/rejected": -0.38390809297561646, "step": 890 }, { "epoch": 0.93, "learning_rate": 7.807118254879449e-06, "logits/chosen": -4.18631649017334, "logits/rejected": -4.07721471786499, "logps/chosen": -407.052734375, "logps/rejected": -331.8683776855469, "loss": 0.5671, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.2453383207321167, "rewards/margins": 0.6055070757865906, "rewards/rejected": -0.3601687550544739, "step": 900 }, { "epoch": 0.94, "learning_rate": 6.659012629161883e-06, "logits/chosen": -4.177254676818848, "logits/rejected": -4.05480432510376, "logps/chosen": -402.8782653808594, "logps/rejected": -322.88629150390625, "loss": 0.5567, "rewards/accuracies": 0.707812488079071, "rewards/chosen": 0.2786063849925995, "rewards/margins": 0.6515501737594604, "rewards/rejected": -0.3729437589645386, "step": 910 }, { "epoch": 0.95, "learning_rate": 5.510907003444317e-06, "logits/chosen": -4.227939605712891, "logits/rejected": -4.0849127769470215, "logps/chosen": -411.8175354003906, "logps/rejected": -328.0929260253906, "loss": 0.5228, "rewards/accuracies": 0.745312511920929, "rewards/chosen": 0.3645602762699127, "rewards/margins": 0.7300227880477905, "rewards/rejected": -0.3654625117778778, "step": 920 }, { "epoch": 0.96, "learning_rate": 4.362801377726751e-06, "logits/chosen": -4.217653751373291, "logits/rejected": -4.041480541229248, "logps/chosen": -412.8955078125, "logps/rejected": -309.83575439453125, "loss": 0.5456, "rewards/accuracies": 0.707812488079071, "rewards/chosen": 0.280931293964386, "rewards/margins": 0.6941055655479431, "rewards/rejected": -0.4131743013858795, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.214695752009185e-06, "logits/chosen": -4.193150520324707, "logits/rejected": -4.098111152648926, "logps/chosen": -404.9234924316406, "logps/rejected": -331.74957275390625, "loss": 0.5386, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": 0.2807141840457916, "rewards/margins": 0.7093294262886047, "rewards/rejected": -0.4286152720451355, "step": 940 }, { "epoch": 0.98, "learning_rate": 2.066590126291619e-06, "logits/chosen": -4.196070671081543, "logits/rejected": -4.099771499633789, "logps/chosen": -398.6044006347656, "logps/rejected": -325.76800537109375, "loss": 0.5634, "rewards/accuracies": 0.71875, "rewards/chosen": 0.23345312476158142, "rewards/margins": 0.5998488664627075, "rewards/rejected": -0.3663956820964813, "step": 950 }, { "epoch": 0.99, "learning_rate": 9.184845005740528e-07, "logits/chosen": -4.210867881774902, "logits/rejected": -4.075991630554199, "logps/chosen": -426.70892333984375, "logps/rejected": -334.88592529296875, "loss": 0.5681, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.2895241975784302, "rewards/margins": 0.6067122220993042, "rewards/rejected": -0.3171880543231964, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -4.129703998565674, "eval_logits/rejected": -4.016438007354736, "eval_logps/chosen": -399.9466857910156, "eval_logps/rejected": -319.7874450683594, "eval_loss": 0.5375946164131165, "eval_rewards/accuracies": 0.7229999899864197, "eval_rewards/chosen": 0.2708839476108551, "eval_rewards/margins": 0.675108790397644, "eval_rewards/rejected": -0.40422478318214417, "eval_runtime": 768.3345, "eval_samples_per_second": 2.603, "eval_steps_per_second": 0.651, "step": 968 }, { "epoch": 1.0, "step": 968, "total_flos": 0.0, "train_loss": 0.5658997891486184, "train_runtime": 38098.3192, "train_samples_per_second": 1.626, "train_steps_per_second": 0.025 } ], "logging_steps": 10, "max_steps": 968, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }