Instructions to use JW17/Q25-3B-It-BTRM-math with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use JW17/Q25-3B-It-BTRM-math with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("JW17/Q25-3B-It-BTRM-math", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.15060240963855423, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1461.0, | |
| "completions/mean_length": 539.939453125, | |
| "completions/mean_terminated_length": 534.0687866210938, | |
| "completions/min_length": 150.0, | |
| "completions/min_terminated_length": 150.0, | |
| "epoch": 0.0007530120481927711, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24425463378429413, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0483, | |
| "num_tokens": 322497.0, | |
| "reward": 18.535240173339844, | |
| "reward_std": 4.5626091957092285, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.535240173339844, | |
| "rewards/skywork_reward/std": 7.240255355834961, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1313.0, | |
| "completions/mean_length": 539.921875, | |
| "completions/mean_terminated_length": 537.9725952148438, | |
| "completions/min_length": 162.0, | |
| "completions/min_terminated_length": 162.0, | |
| "epoch": 0.0015060240963855422, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2527785897254944, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0676, | |
| "num_tokens": 642601.0, | |
| "reward": 19.66384506225586, | |
| "reward_std": 4.405518531799316, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.66384506225586, | |
| "rewards/skywork_reward/std": 7.860788822174072, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1096.0, | |
| "completions/max_terminated_length": 1096.0, | |
| "completions/mean_length": 509.203125, | |
| "completions/mean_terminated_length": 509.203125, | |
| "completions/min_length": 115.0, | |
| "completions/min_terminated_length": 115.0, | |
| "epoch": 0.002259036144578313, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.23311284184455872, | |
| "learning_rate": 1e-06, | |
| "loss": 0.036, | |
| "num_tokens": 948897.0, | |
| "reward": 19.736499786376953, | |
| "reward_std": 4.317551136016846, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.736499786376953, | |
| "rewards/skywork_reward/std": 8.241270065307617, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1332.0, | |
| "completions/max_terminated_length": 1332.0, | |
| "completions/mean_length": 481.34765625, | |
| "completions/mean_terminated_length": 481.34765625, | |
| "completions/min_length": 144.0, | |
| "completions/min_terminated_length": 144.0, | |
| "epoch": 0.0030120481927710845, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25114020705223083, | |
| "learning_rate": 1e-06, | |
| "loss": 0.037, | |
| "num_tokens": 1240675.0, | |
| "reward": 19.980758666992188, | |
| "reward_std": 4.497593879699707, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.980758666992188, | |
| "rewards/skywork_reward/std": 7.561793804168701, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1367.0, | |
| "completions/mean_length": 550.08984375, | |
| "completions/mean_terminated_length": 548.1604614257812, | |
| "completions/min_length": 105.0, | |
| "completions/min_terminated_length": 105.0, | |
| "epoch": 0.0037650602409638554, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27846774458885193, | |
| "learning_rate": 1e-06, | |
| "loss": 0.06, | |
| "num_tokens": 1567393.0, | |
| "reward": 15.45170783996582, | |
| "reward_std": 4.615124702453613, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 15.45170783996582, | |
| "rewards/skywork_reward/std": 7.330972194671631, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1435.0, | |
| "completions/mean_length": 466.416015625, | |
| "completions/mean_terminated_length": 462.2215881347656, | |
| "completions/min_length": 137.0, | |
| "completions/min_terminated_length": 137.0, | |
| "epoch": 0.004518072289156626, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26218029856681824, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0554, | |
| "num_tokens": 1848294.0, | |
| "reward": 15.858068466186523, | |
| "reward_std": 4.51039981842041, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 15.858068466186523, | |
| "rewards/skywork_reward/std": 8.340972900390625, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1397.0, | |
| "completions/mean_length": 546.90625, | |
| "completions/mean_terminated_length": 544.9706420898438, | |
| "completions/min_length": 127.0, | |
| "completions/min_terminated_length": 127.0, | |
| "epoch": 0.005271084337349397, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2502232789993286, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0183, | |
| "num_tokens": 2176214.0, | |
| "reward": 20.738807678222656, | |
| "reward_std": 4.506719589233398, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.738807678222656, | |
| "rewards/skywork_reward/std": 7.784599781036377, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1344.0, | |
| "completions/mean_length": 507.919921875, | |
| "completions/mean_terminated_length": 505.90802001953125, | |
| "completions/min_length": 149.0, | |
| "completions/min_terminated_length": 149.0, | |
| "epoch": 0.006024096385542169, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24587783217430115, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0108, | |
| "num_tokens": 2483485.0, | |
| "reward": 18.190261840820312, | |
| "reward_std": 4.080215930938721, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.190261840820312, | |
| "rewards/skywork_reward/std": 7.144257068634033, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1392.0, | |
| "completions/mean_length": 517.755859375, | |
| "completions/mean_terminated_length": 513.7627563476562, | |
| "completions/min_length": 147.0, | |
| "completions/min_terminated_length": 147.0, | |
| "epoch": 0.00677710843373494, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25550204515457153, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0365, | |
| "num_tokens": 2791968.0, | |
| "reward": 18.123830795288086, | |
| "reward_std": 3.959782838821411, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.123830795288086, | |
| "rewards/skywork_reward/std": 8.163817405700684, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1385.0, | |
| "completions/mean_length": 469.869140625, | |
| "completions/mean_terminated_length": 467.78277587890625, | |
| "completions/min_length": 119.0, | |
| "completions/min_terminated_length": 119.0, | |
| "epoch": 0.007530120481927711, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2836010456085205, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0308, | |
| "num_tokens": 3079901.0, | |
| "reward": 19.304351806640625, | |
| "reward_std": 4.372314453125, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.304351806640625, | |
| "rewards/skywork_reward/std": 7.716484069824219, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1485.0, | |
| "completions/max_terminated_length": 1485.0, | |
| "completions/mean_length": 500.615234375, | |
| "completions/mean_terminated_length": 500.615234375, | |
| "completions/min_length": 84.0, | |
| "completions/min_terminated_length": 84.0, | |
| "epoch": 0.008283132530120483, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.246844083070755, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0386, | |
| "num_tokens": 3382520.0, | |
| "reward": 18.477981567382812, | |
| "reward_std": 4.108589172363281, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.477981567382812, | |
| "rewards/skywork_reward/std": 7.084549427032471, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1090.0, | |
| "completions/max_terminated_length": 1090.0, | |
| "completions/mean_length": 478.2578125, | |
| "completions/mean_terminated_length": 478.2578125, | |
| "completions/min_length": 202.0, | |
| "completions/min_terminated_length": 202.0, | |
| "epoch": 0.009036144578313253, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2760355472564697, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0437, | |
| "num_tokens": 3669020.0, | |
| "reward": 18.244699478149414, | |
| "reward_std": 4.524354457855225, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.244699478149414, | |
| "rewards/skywork_reward/std": 7.753305435180664, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1359.0, | |
| "completions/mean_length": 509.525390625, | |
| "completions/mean_terminated_length": 505.5000305175781, | |
| "completions/min_length": 139.0, | |
| "completions/min_terminated_length": 139.0, | |
| "epoch": 0.009789156626506024, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2421659678220749, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0431, | |
| "num_tokens": 3973577.0, | |
| "reward": 18.84097671508789, | |
| "reward_std": 3.6833090782165527, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.84097671508789, | |
| "rewards/skywork_reward/std": 7.0365753173828125, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 986.0, | |
| "completions/max_terminated_length": 986.0, | |
| "completions/mean_length": 502.75, | |
| "completions/mean_terminated_length": 502.75, | |
| "completions/min_length": 201.0, | |
| "completions/min_terminated_length": 201.0, | |
| "epoch": 0.010542168674698794, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24300047755241394, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0095, | |
| "num_tokens": 4275273.0, | |
| "reward": 19.73338508605957, | |
| "reward_std": 3.7505850791931152, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.73338508605957, | |
| "rewards/skywork_reward/std": 7.851949691772461, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1212.0, | |
| "completions/mean_length": 495.01171875, | |
| "completions/mean_terminated_length": 492.97454833984375, | |
| "completions/min_length": 82.0, | |
| "completions/min_terminated_length": 82.0, | |
| "epoch": 0.011295180722891566, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24820354580879211, | |
| "learning_rate": 1e-06, | |
| "loss": 0.041, | |
| "num_tokens": 4572607.0, | |
| "reward": 20.985137939453125, | |
| "reward_std": 3.9907798767089844, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.985137939453125, | |
| "rewards/skywork_reward/std": 7.5082783699035645, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1452.0, | |
| "completions/max_terminated_length": 1452.0, | |
| "completions/mean_length": 503.646484375, | |
| "completions/mean_terminated_length": 503.646484375, | |
| "completions/min_length": 167.0, | |
| "completions/min_terminated_length": 167.0, | |
| "epoch": 0.012048192771084338, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25032636523246765, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0271, | |
| "num_tokens": 4879242.0, | |
| "reward": 19.479820251464844, | |
| "reward_std": 3.9832570552825928, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.479820251464844, | |
| "rewards/skywork_reward/std": 7.7815022468566895, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1505.0, | |
| "completions/mean_length": 548.275390625, | |
| "completions/mean_terminated_length": 546.3424682617188, | |
| "completions/min_length": 183.0, | |
| "completions/min_terminated_length": 183.0, | |
| "epoch": 0.012801204819277108, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.23289093375205994, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0344, | |
| "num_tokens": 5206471.0, | |
| "reward": 18.09130859375, | |
| "reward_std": 4.67989444732666, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.09130859375, | |
| "rewards/skywork_reward/std": 8.046331405639648, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1333.0, | |
| "completions/max_terminated_length": 1333.0, | |
| "completions/mean_length": 500.513671875, | |
| "completions/mean_terminated_length": 500.513671875, | |
| "completions/min_length": 105.0, | |
| "completions/min_terminated_length": 105.0, | |
| "epoch": 0.01355421686746988, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2743029296398163, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0608, | |
| "num_tokens": 5508110.0, | |
| "reward": 19.004474639892578, | |
| "reward_std": 4.812801361083984, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.004474639892578, | |
| "rewards/skywork_reward/std": 7.56382942199707, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1300.0, | |
| "completions/mean_length": 493.69921875, | |
| "completions/mean_terminated_length": 491.65948486328125, | |
| "completions/min_length": 173.0, | |
| "completions/min_terminated_length": 173.0, | |
| "epoch": 0.01430722891566265, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24759620428085327, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0399, | |
| "num_tokens": 5802532.0, | |
| "reward": 20.18354034423828, | |
| "reward_std": 4.097359657287598, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.18354034423828, | |
| "rewards/skywork_reward/std": 6.95109224319458, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1506.0, | |
| "completions/mean_length": 540.8828125, | |
| "completions/mean_terminated_length": 536.9804077148438, | |
| "completions/min_length": 141.0, | |
| "completions/min_terminated_length": 141.0, | |
| "epoch": 0.015060240963855422, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.22581446170806885, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0349, | |
| "num_tokens": 6127912.0, | |
| "reward": 18.810096740722656, | |
| "reward_std": 4.296095848083496, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.810096740722656, | |
| "rewards/skywork_reward/std": 7.788106918334961, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 568.685546875, | |
| "completions/mean_terminated_length": 562.9843139648438, | |
| "completions/min_length": 141.0, | |
| "completions/min_terminated_length": 141.0, | |
| "epoch": 0.01581325301204819, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25190842151641846, | |
| "learning_rate": 1e-06, | |
| "loss": 0.031, | |
| "num_tokens": 6462007.0, | |
| "reward": 16.9500675201416, | |
| "reward_std": 3.961681842803955, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 16.9500675201416, | |
| "rewards/skywork_reward/std": 7.946998119354248, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1516.0, | |
| "completions/mean_length": 536.982421875, | |
| "completions/mean_terminated_length": 535.0274047851562, | |
| "completions/min_length": 138.0, | |
| "completions/min_terminated_length": 138.0, | |
| "epoch": 0.016566265060240965, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.23905393481254578, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0237, | |
| "num_tokens": 6777998.0, | |
| "reward": 19.38282012939453, | |
| "reward_std": 3.568166494369507, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.38282012939453, | |
| "rewards/skywork_reward/std": 8.19947624206543, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1442.0, | |
| "completions/mean_length": 564.58203125, | |
| "completions/mean_terminated_length": 562.6810302734375, | |
| "completions/min_length": 91.0, | |
| "completions/min_terminated_length": 91.0, | |
| "epoch": 0.017319277108433735, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24202950298786163, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0379, | |
| "num_tokens": 7113960.0, | |
| "reward": 17.542194366455078, | |
| "reward_std": 3.6086037158966064, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 17.542194366455078, | |
| "rewards/skywork_reward/std": 6.972787857055664, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1526.0, | |
| "completions/mean_length": 594.02734375, | |
| "completions/mean_terminated_length": 584.7376708984375, | |
| "completions/min_length": 157.0, | |
| "completions/min_terminated_length": 157.0, | |
| "epoch": 0.018072289156626505, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.23193013668060303, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0177, | |
| "num_tokens": 7464662.0, | |
| "reward": 19.02389907836914, | |
| "reward_std": 4.250513076782227, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.02389907836914, | |
| "rewards/skywork_reward/std": 6.219736576080322, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1258.0, | |
| "completions/mean_length": 549.28125, | |
| "completions/mean_terminated_length": 547.3502807617188, | |
| "completions/min_length": 244.0, | |
| "completions/min_terminated_length": 244.0, | |
| "epoch": 0.01882530120481928, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24706406891345978, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0544, | |
| "num_tokens": 7789846.0, | |
| "reward": 18.385107040405273, | |
| "reward_std": 4.854748249053955, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.385107040405273, | |
| "rewards/skywork_reward/std": 7.241014003753662, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1387.0, | |
| "completions/mean_length": 531.689453125, | |
| "completions/mean_terminated_length": 521.7850341796875, | |
| "completions/min_length": 180.0, | |
| "completions/min_terminated_length": 180.0, | |
| "epoch": 0.01957831325301205, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2525196373462677, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0523, | |
| "num_tokens": 8111927.0, | |
| "reward": 19.272109985351562, | |
| "reward_std": 3.762904167175293, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.272109985351562, | |
| "rewards/skywork_reward/std": 7.09940767288208, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1308.0, | |
| "completions/mean_length": 547.365234375, | |
| "completions/mean_terminated_length": 545.4305419921875, | |
| "completions/min_length": 145.0, | |
| "completions/min_terminated_length": 145.0, | |
| "epoch": 0.02033132530120482, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2493782937526703, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0507, | |
| "num_tokens": 8435202.0, | |
| "reward": 17.78737449645996, | |
| "reward_std": 4.015490531921387, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 17.78737449645996, | |
| "rewards/skywork_reward/std": 7.691453456878662, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1301.0, | |
| "completions/max_terminated_length": 1301.0, | |
| "completions/mean_length": 478.685546875, | |
| "completions/mean_terminated_length": 478.685546875, | |
| "completions/min_length": 149.0, | |
| "completions/min_terminated_length": 149.0, | |
| "epoch": 0.02108433734939759, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26527339220046997, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0339, | |
| "num_tokens": 8727633.0, | |
| "reward": 18.366004943847656, | |
| "reward_std": 3.85819411277771, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.366004943847656, | |
| "rewards/skywork_reward/std": 6.539942264556885, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1473.0, | |
| "completions/max_terminated_length": 1473.0, | |
| "completions/mean_length": 509.15234375, | |
| "completions/mean_terminated_length": 509.15234375, | |
| "completions/min_length": 99.0, | |
| "completions/min_terminated_length": 99.0, | |
| "epoch": 0.021837349397590362, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2530669867992401, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0226, | |
| "num_tokens": 9036511.0, | |
| "reward": 18.36314582824707, | |
| "reward_std": 3.6422839164733887, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.36314582824707, | |
| "rewards/skywork_reward/std": 7.377774715423584, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1452.0, | |
| "completions/mean_length": 523.609375, | |
| "completions/mean_terminated_length": 521.628173828125, | |
| "completions/min_length": 139.0, | |
| "completions/min_terminated_length": 139.0, | |
| "epoch": 0.022590361445783132, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24128130078315735, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0412, | |
| "num_tokens": 9345751.0, | |
| "reward": 20.08295440673828, | |
| "reward_std": 3.875840902328491, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.08295440673828, | |
| "rewards/skywork_reward/std": 7.567595958709717, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1503.0, | |
| "completions/mean_length": 516.138671875, | |
| "completions/mean_terminated_length": 512.1392211914062, | |
| "completions/min_length": 169.0, | |
| "completions/min_terminated_length": 169.0, | |
| "epoch": 0.023343373493975902, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2687889337539673, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0406, | |
| "num_tokens": 9651646.0, | |
| "reward": 18.440399169921875, | |
| "reward_std": 4.034783363342285, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.440399169921875, | |
| "rewards/skywork_reward/std": 9.241204261779785, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1351.0, | |
| "completions/max_terminated_length": 1351.0, | |
| "completions/mean_length": 509.87109375, | |
| "completions/mean_terminated_length": 509.87109375, | |
| "completions/min_length": 136.0, | |
| "completions/min_terminated_length": 136.0, | |
| "epoch": 0.024096385542168676, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2565000653266907, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0341, | |
| "num_tokens": 9958156.0, | |
| "reward": 21.218032836914062, | |
| "reward_std": 3.8647594451904297, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.218032836914062, | |
| "rewards/skywork_reward/std": 7.692581653594971, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1272.0, | |
| "completions/mean_length": 483.21484375, | |
| "completions/mean_terminated_length": 481.15460205078125, | |
| "completions/min_length": 47.0, | |
| "completions/min_terminated_length": 47.0, | |
| "epoch": 0.024849397590361446, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2624817490577698, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0431, | |
| "num_tokens": 10249610.0, | |
| "reward": 19.79848861694336, | |
| "reward_std": 4.353243827819824, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.79848861694336, | |
| "rewards/skywork_reward/std": 7.000552654266357, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1237.0, | |
| "completions/max_terminated_length": 1237.0, | |
| "completions/mean_length": 479.90234375, | |
| "completions/mean_terminated_length": 479.90234375, | |
| "completions/min_length": 135.0, | |
| "completions/min_terminated_length": 135.0, | |
| "epoch": 0.025602409638554216, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2779456377029419, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0414, | |
| "num_tokens": 10539784.0, | |
| "reward": 18.208389282226562, | |
| "reward_std": 3.7550158500671387, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.208389282226562, | |
| "rewards/skywork_reward/std": 7.102542400360107, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1438.0, | |
| "completions/max_terminated_length": 1438.0, | |
| "completions/mean_length": 538.0859375, | |
| "completions/mean_terminated_length": 538.0859375, | |
| "completions/min_length": 181.0, | |
| "completions/min_terminated_length": 181.0, | |
| "epoch": 0.02635542168674699, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27090051770210266, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0616, | |
| "num_tokens": 10861060.0, | |
| "reward": 18.551515579223633, | |
| "reward_std": 4.775388717651367, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.551515579223633, | |
| "rewards/skywork_reward/std": 8.144341468811035, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1414.0, | |
| "completions/mean_length": 523.318359375, | |
| "completions/mean_terminated_length": 521.3366088867188, | |
| "completions/min_length": 161.0, | |
| "completions/min_terminated_length": 161.0, | |
| "epoch": 0.02710843373493976, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24933283030986786, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0219, | |
| "num_tokens": 11174711.0, | |
| "reward": 19.23906707763672, | |
| "reward_std": 3.579415798187256, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.23906707763672, | |
| "rewards/skywork_reward/std": 6.7693352699279785, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1471.0, | |
| "completions/max_terminated_length": 1471.0, | |
| "completions/mean_length": 540.38671875, | |
| "completions/mean_terminated_length": 540.38671875, | |
| "completions/min_length": 112.0, | |
| "completions/min_terminated_length": 112.0, | |
| "epoch": 0.02786144578313253, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2600509524345398, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0552, | |
| "num_tokens": 11499629.0, | |
| "reward": 17.48117446899414, | |
| "reward_std": 4.680095672607422, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 17.48117446899414, | |
| "rewards/skywork_reward/std": 7.522308826446533, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1324.0, | |
| "completions/mean_length": 527.3359375, | |
| "completions/mean_terminated_length": 525.362060546875, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "epoch": 0.0286144578313253, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2569137215614319, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0256, | |
| "num_tokens": 11812185.0, | |
| "reward": 19.502559661865234, | |
| "reward_std": 4.2003655433654785, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.502559661865234, | |
| "rewards/skywork_reward/std": 7.574954986572266, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1497.0, | |
| "completions/mean_length": 546.02734375, | |
| "completions/mean_terminated_length": 540.1925659179688, | |
| "completions/min_length": 201.0, | |
| "completions/min_terminated_length": 201.0, | |
| "epoch": 0.029367469879518073, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25866448879241943, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0702, | |
| "num_tokens": 12136471.0, | |
| "reward": 19.45880889892578, | |
| "reward_std": 4.22914457321167, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.45880889892578, | |
| "rewards/skywork_reward/std": 7.417150497436523, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1300.0, | |
| "completions/mean_length": 549.212890625, | |
| "completions/mean_terminated_length": 543.3968505859375, | |
| "completions/min_length": 106.0, | |
| "completions/min_terminated_length": 106.0, | |
| "epoch": 0.030120481927710843, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26242557168006897, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0572, | |
| "num_tokens": 12468676.0, | |
| "reward": 18.719432830810547, | |
| "reward_std": 4.223837852478027, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.719432830810547, | |
| "rewards/skywork_reward/std": 7.981883525848389, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1474.0, | |
| "completions/mean_length": 592.703125, | |
| "completions/mean_terminated_length": 587.1434326171875, | |
| "completions/min_length": 206.0, | |
| "completions/min_terminated_length": 206.0, | |
| "epoch": 0.030873493975903613, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.23195688426494598, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0277, | |
| "num_tokens": 12819564.0, | |
| "reward": 19.721298217773438, | |
| "reward_std": 4.098422527313232, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.721298217773438, | |
| "rewards/skywork_reward/std": 8.307689666748047, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1385.0, | |
| "completions/mean_length": 505.2734375, | |
| "completions/mean_terminated_length": 501.2314147949219, | |
| "completions/min_length": 163.0, | |
| "completions/min_terminated_length": 163.0, | |
| "epoch": 0.03162650602409638, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26936817169189453, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0703, | |
| "num_tokens": 13120760.0, | |
| "reward": 19.512004852294922, | |
| "reward_std": 4.078932762145996, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.512004852294922, | |
| "rewards/skywork_reward/std": 7.664077281951904, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1478.0, | |
| "completions/mean_length": 525.787109375, | |
| "completions/mean_terminated_length": 523.8101806640625, | |
| "completions/min_length": 132.0, | |
| "completions/min_terminated_length": 132.0, | |
| "epoch": 0.03237951807228916, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25450485944747925, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0453, | |
| "num_tokens": 13435275.0, | |
| "reward": 19.4080810546875, | |
| "reward_std": 4.121971130371094, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.4080810546875, | |
| "rewards/skywork_reward/std": 8.246720314025879, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1514.0, | |
| "completions/mean_length": 598.4140625, | |
| "completions/mean_terminated_length": 573.9879760742188, | |
| "completions/min_length": 203.0, | |
| "completions/min_terminated_length": 203.0, | |
| "epoch": 0.03313253012048193, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.22897975146770477, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0552, | |
| "num_tokens": 13792623.0, | |
| "reward": 19.808815002441406, | |
| "reward_std": 4.206421375274658, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.808815002441406, | |
| "rewards/skywork_reward/std": 8.275568008422852, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.02734375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1531.0, | |
| "completions/mean_length": 540.21875, | |
| "completions/mean_terminated_length": 512.224853515625, | |
| "completions/min_length": 122.0, | |
| "completions/min_terminated_length": 122.0, | |
| "epoch": 0.0338855421686747, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26464077830314636, | |
| "learning_rate": 1e-06, | |
| "loss": 0.084, | |
| "num_tokens": 14114543.0, | |
| "reward": 19.398923873901367, | |
| "reward_std": 4.753540992736816, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.398923873901367, | |
| "rewards/skywork_reward/std": 8.964569091796875, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1331.0, | |
| "completions/mean_length": 550.970703125, | |
| "completions/mean_terminated_length": 543.2145385742188, | |
| "completions/min_length": 141.0, | |
| "completions/min_terminated_length": 141.0, | |
| "epoch": 0.03463855421686747, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26099711656570435, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0685, | |
| "num_tokens": 14438880.0, | |
| "reward": 20.744400024414062, | |
| "reward_std": 4.4266676902771, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.744400024414062, | |
| "rewards/skywork_reward/std": 9.021385192871094, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1476.0, | |
| "completions/mean_length": 555.751953125, | |
| "completions/mean_terminated_length": 553.8336791992188, | |
| "completions/min_length": 159.0, | |
| "completions/min_terminated_length": 159.0, | |
| "epoch": 0.035391566265060244, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.23918218910694122, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0153, | |
| "num_tokens": 14773041.0, | |
| "reward": 18.19009017944336, | |
| "reward_std": 3.2478275299072266, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.19009017944336, | |
| "rewards/skywork_reward/std": 8.022202491760254, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1381.0, | |
| "completions/mean_length": 504.818359375, | |
| "completions/mean_terminated_length": 502.8003845214844, | |
| "completions/min_length": 163.0, | |
| "completions/min_terminated_length": 163.0, | |
| "epoch": 0.03614457831325301, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2639124095439911, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0118, | |
| "num_tokens": 15076308.0, | |
| "reward": 18.888355255126953, | |
| "reward_std": 3.525162696838379, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.888355255126953, | |
| "rewards/skywork_reward/std": 7.233704090118408, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1477.0, | |
| "completions/mean_length": 558.080078125, | |
| "completions/mean_terminated_length": 552.3163452148438, | |
| "completions/min_length": 171.0, | |
| "completions/min_terminated_length": 171.0, | |
| "epoch": 0.036897590361445784, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2637381851673126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.061, | |
| "num_tokens": 15408557.0, | |
| "reward": 18.518508911132812, | |
| "reward_std": 4.11885929107666, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.518508911132812, | |
| "rewards/skywork_reward/std": 7.327213287353516, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1511.0, | |
| "completions/mean_length": 519.8359375, | |
| "completions/mean_terminated_length": 503.7063903808594, | |
| "completions/min_length": 216.0, | |
| "completions/min_terminated_length": 216.0, | |
| "epoch": 0.03765060240963856, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2628968060016632, | |
| "learning_rate": 1e-06, | |
| "loss": 0.052, | |
| "num_tokens": 15719481.0, | |
| "reward": 19.84031105041504, | |
| "reward_std": 3.317889928817749, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.84031105041504, | |
| "rewards/skywork_reward/std": 7.0079755783081055, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1354.0, | |
| "completions/max_terminated_length": 1354.0, | |
| "completions/mean_length": 460.279296875, | |
| "completions/mean_terminated_length": 460.279296875, | |
| "completions/min_length": 109.0, | |
| "completions/min_terminated_length": 109.0, | |
| "epoch": 0.038403614457831324, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.279048889875412, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0238, | |
| "num_tokens": 16001848.0, | |
| "reward": 20.461505889892578, | |
| "reward_std": 3.4711790084838867, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.461505889892578, | |
| "rewards/skywork_reward/std": 6.913259506225586, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1500.0, | |
| "completions/max_terminated_length": 1500.0, | |
| "completions/mean_length": 550.14453125, | |
| "completions/mean_terminated_length": 550.14453125, | |
| "completions/min_length": 184.0, | |
| "completions/min_terminated_length": 184.0, | |
| "epoch": 0.0391566265060241, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24522554874420166, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0237, | |
| "num_tokens": 16331186.0, | |
| "reward": 19.905433654785156, | |
| "reward_std": 3.6912999153137207, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.905433654785156, | |
| "rewards/skywork_reward/std": 7.08914852142334, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 586.515625, | |
| "completions/mean_terminated_length": 582.7921752929688, | |
| "completions/min_length": 185.0, | |
| "completions/min_terminated_length": 185.0, | |
| "epoch": 0.039909638554216864, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2638297379016876, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0588, | |
| "num_tokens": 16675610.0, | |
| "reward": 19.24047088623047, | |
| "reward_std": 4.73069953918457, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.24047088623047, | |
| "rewards/skywork_reward/std": 8.525131225585938, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1396.0, | |
| "completions/max_terminated_length": 1396.0, | |
| "completions/mean_length": 524.859375, | |
| "completions/mean_terminated_length": 524.859375, | |
| "completions/min_length": 148.0, | |
| "completions/min_terminated_length": 148.0, | |
| "epoch": 0.04066265060240964, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27795007824897766, | |
| "learning_rate": 1e-06, | |
| "loss": 0.033, | |
| "num_tokens": 16987650.0, | |
| "reward": 18.526611328125, | |
| "reward_std": 4.1653361320495605, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.526611328125, | |
| "rewards/skywork_reward/std": 6.746675968170166, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 551.83203125, | |
| "completions/mean_terminated_length": 549.9060668945312, | |
| "completions/min_length": 188.0, | |
| "completions/min_terminated_length": 188.0, | |
| "epoch": 0.04141566265060241, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26270273327827454, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0523, | |
| "num_tokens": 17316684.0, | |
| "reward": 18.824560165405273, | |
| "reward_std": 3.540390968322754, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.824560165405273, | |
| "rewards/skywork_reward/std": 8.527042388916016, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1397.0, | |
| "completions/mean_length": 575.982421875, | |
| "completions/mean_terminated_length": 572.2176513671875, | |
| "completions/min_length": 179.0, | |
| "completions/min_terminated_length": 179.0, | |
| "epoch": 0.04216867469879518, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24960987269878387, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0122, | |
| "num_tokens": 17659123.0, | |
| "reward": 19.136398315429688, | |
| "reward_std": 3.8432905673980713, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.136398315429688, | |
| "rewards/skywork_reward/std": 8.282490730285645, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1534.0, | |
| "completions/mean_length": 542.234375, | |
| "completions/mean_terminated_length": 540.2896118164062, | |
| "completions/min_length": 159.0, | |
| "completions/min_terminated_length": 159.0, | |
| "epoch": 0.04292168674698795, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25888824462890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0212, | |
| "num_tokens": 17983035.0, | |
| "reward": 19.597389221191406, | |
| "reward_std": 3.953815221786499, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.597389221191406, | |
| "rewards/skywork_reward/std": 8.315513610839844, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1502.0, | |
| "completions/max_terminated_length": 1502.0, | |
| "completions/mean_length": 521.236328125, | |
| "completions/mean_terminated_length": 521.236328125, | |
| "completions/min_length": 115.0, | |
| "completions/min_terminated_length": 115.0, | |
| "epoch": 0.043674698795180725, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2533751130104065, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0332, | |
| "num_tokens": 18294164.0, | |
| "reward": 20.750991821289062, | |
| "reward_std": 3.6470861434936523, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.750991821289062, | |
| "rewards/skywork_reward/std": 6.336122035980225, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1522.0, | |
| "completions/mean_length": 568.337890625, | |
| "completions/mean_terminated_length": 562.6345825195312, | |
| "completions/min_length": 189.0, | |
| "completions/min_terminated_length": 189.0, | |
| "epoch": 0.04442771084337349, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2571529150009155, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0495, | |
| "num_tokens": 18633617.0, | |
| "reward": 17.738445281982422, | |
| "reward_std": 3.885385751724243, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 17.738445281982422, | |
| "rewards/skywork_reward/std": 7.068282127380371, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1509.0, | |
| "completions/mean_length": 504.2734375, | |
| "completions/mean_terminated_length": 502.25439453125, | |
| "completions/min_length": 172.0, | |
| "completions/min_terminated_length": 172.0, | |
| "epoch": 0.045180722891566265, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2818680703639984, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0518, | |
| "num_tokens": 18939565.0, | |
| "reward": 19.56861114501953, | |
| "reward_std": 3.992223024368286, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.56861114501953, | |
| "rewards/skywork_reward/std": 7.308934688568115, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1499.0, | |
| "completions/mean_length": 550.580078125, | |
| "completions/mean_terminated_length": 546.7156982421875, | |
| "completions/min_length": 107.0, | |
| "completions/min_terminated_length": 107.0, | |
| "epoch": 0.04593373493975904, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2757847309112549, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0732, | |
| "num_tokens": 19263446.0, | |
| "reward": 18.18868637084961, | |
| "reward_std": 4.381465911865234, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.18868637084961, | |
| "rewards/skywork_reward/std": 7.37313175201416, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1189.0, | |
| "completions/max_terminated_length": 1189.0, | |
| "completions/mean_length": 484.359375, | |
| "completions/mean_terminated_length": 484.359375, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "epoch": 0.046686746987951805, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2803126275539398, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0489, | |
| "num_tokens": 19560014.0, | |
| "reward": 20.43779754638672, | |
| "reward_std": 4.052783489227295, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.43779754638672, | |
| "rewards/skywork_reward/std": 6.660309314727783, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1449.0, | |
| "completions/max_terminated_length": 1449.0, | |
| "completions/mean_length": 572.158203125, | |
| "completions/mean_terminated_length": 572.158203125, | |
| "completions/min_length": 197.0, | |
| "completions/min_terminated_length": 197.0, | |
| "epoch": 0.04743975903614458, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.25800344347953796, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0317, | |
| "num_tokens": 19902143.0, | |
| "reward": 20.61004638671875, | |
| "reward_std": 3.9054417610168457, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.61004638671875, | |
| "rewards/skywork_reward/std": 7.143301010131836, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1434.0, | |
| "completions/max_terminated_length": 1434.0, | |
| "completions/mean_length": 531.67578125, | |
| "completions/mean_terminated_length": 531.67578125, | |
| "completions/min_length": 168.0, | |
| "completions/min_terminated_length": 168.0, | |
| "epoch": 0.04819277108433735, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2537233829498291, | |
| "learning_rate": 1e-06, | |
| "loss": 0.013, | |
| "num_tokens": 20217081.0, | |
| "reward": 22.289291381835938, | |
| "reward_std": 3.0658791065216064, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.289291381835938, | |
| "rewards/skywork_reward/std": 7.240992069244385, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1506.0, | |
| "completions/mean_length": 574.796875, | |
| "completions/mean_terminated_length": 559.5397338867188, | |
| "completions/min_length": 173.0, | |
| "completions/min_terminated_length": 173.0, | |
| "epoch": 0.04894578313253012, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2763891816139221, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0475, | |
| "num_tokens": 20557233.0, | |
| "reward": 21.378662109375, | |
| "reward_std": 4.32180118560791, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.378662109375, | |
| "rewards/skywork_reward/std": 8.400932312011719, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1176.0, | |
| "completions/max_terminated_length": 1176.0, | |
| "completions/mean_length": 519.951171875, | |
| "completions/mean_terminated_length": 519.951171875, | |
| "completions/min_length": 124.0, | |
| "completions/min_terminated_length": 124.0, | |
| "epoch": 0.04969879518072289, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2865884304046631, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0077, | |
| "num_tokens": 20867336.0, | |
| "reward": 22.28472900390625, | |
| "reward_std": 3.520258665084839, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.28472900390625, | |
| "rewards/skywork_reward/std": 7.874880313873291, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1073.0, | |
| "completions/max_terminated_length": 1073.0, | |
| "completions/mean_length": 483.09765625, | |
| "completions/mean_terminated_length": 483.09765625, | |
| "completions/min_length": 109.0, | |
| "completions/min_terminated_length": 109.0, | |
| "epoch": 0.050451807228915665, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30400651693344116, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0363, | |
| "num_tokens": 21158026.0, | |
| "reward": 20.857650756835938, | |
| "reward_std": 3.738304615020752, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.857650756835938, | |
| "rewards/skywork_reward/std": 7.2754058837890625, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1195.0, | |
| "completions/max_terminated_length": 1195.0, | |
| "completions/mean_length": 529.998046875, | |
| "completions/mean_terminated_length": 529.998046875, | |
| "completions/min_length": 161.0, | |
| "completions/min_terminated_length": 161.0, | |
| "epoch": 0.05120481927710843, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2766510546207428, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0479, | |
| "num_tokens": 21474633.0, | |
| "reward": 20.8536434173584, | |
| "reward_std": 3.9283032417297363, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.8536434173584, | |
| "rewards/skywork_reward/std": 7.341203689575195, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1532.0, | |
| "completions/mean_length": 554.416015625, | |
| "completions/mean_terminated_length": 550.5667114257812, | |
| "completions/min_length": 154.0, | |
| "completions/min_terminated_length": 154.0, | |
| "epoch": 0.051957831325301206, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2611565887928009, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0488, | |
| "num_tokens": 21798510.0, | |
| "reward": 19.972572326660156, | |
| "reward_std": 3.5066475868225098, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.972572326660156, | |
| "rewards/skywork_reward/std": 7.777656555175781, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1522.0, | |
| "completions/mean_length": 555.4453125, | |
| "completions/mean_terminated_length": 549.666015625, | |
| "completions/min_length": 165.0, | |
| "completions/min_terminated_length": 165.0, | |
| "epoch": 0.05271084337349398, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.24628393352031708, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0382, | |
| "num_tokens": 22124706.0, | |
| "reward": 21.486955642700195, | |
| "reward_std": 3.622591257095337, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.486955642700195, | |
| "rewards/skywork_reward/std": 7.51347541809082, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1233.0, | |
| "completions/mean_length": 549.400390625, | |
| "completions/mean_terminated_length": 547.4696655273438, | |
| "completions/min_length": 151.0, | |
| "completions/min_terminated_length": 151.0, | |
| "epoch": 0.053463855421686746, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26588577032089233, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0084, | |
| "num_tokens": 22455471.0, | |
| "reward": 20.11474609375, | |
| "reward_std": 3.2167859077453613, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.11474609375, | |
| "rewards/skywork_reward/std": 6.900139808654785, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1421.0, | |
| "completions/mean_length": 515.904296875, | |
| "completions/mean_terminated_length": 513.9080200195312, | |
| "completions/min_length": 198.0, | |
| "completions/min_terminated_length": 198.0, | |
| "epoch": 0.05421686746987952, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.28270772099494934, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0185, | |
| "num_tokens": 22765118.0, | |
| "reward": 21.868011474609375, | |
| "reward_std": 3.350029468536377, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.868011474609375, | |
| "rewards/skywork_reward/std": 6.04210090637207, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1175.0, | |
| "completions/max_terminated_length": 1175.0, | |
| "completions/mean_length": 571.068359375, | |
| "completions/mean_terminated_length": 571.068359375, | |
| "completions/min_length": 203.0, | |
| "completions/min_terminated_length": 203.0, | |
| "epoch": 0.054969879518072286, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.263434499502182, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0321, | |
| "num_tokens": 23103025.0, | |
| "reward": 19.462799072265625, | |
| "reward_std": 3.916111707687378, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.462799072265625, | |
| "rewards/skywork_reward/std": 8.177480697631836, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1504.0, | |
| "completions/mean_length": 615.650390625, | |
| "completions/mean_terminated_length": 612.0411987304688, | |
| "completions/min_length": 206.0, | |
| "completions/min_terminated_length": 206.0, | |
| "epoch": 0.05572289156626506, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2504398822784424, | |
| "learning_rate": 1e-06, | |
| "loss": 0.024, | |
| "num_tokens": 23460030.0, | |
| "reward": 22.602195739746094, | |
| "reward_std": 4.115928649902344, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.602195739746094, | |
| "rewards/skywork_reward/std": 7.8947224617004395, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1517.0, | |
| "completions/max_terminated_length": 1517.0, | |
| "completions/mean_length": 550.38671875, | |
| "completions/mean_terminated_length": 550.38671875, | |
| "completions/min_length": 176.0, | |
| "completions/min_terminated_length": 176.0, | |
| "epoch": 0.05647590361445783, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27360963821411133, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0359, | |
| "num_tokens": 23787700.0, | |
| "reward": 19.700706481933594, | |
| "reward_std": 3.7519848346710205, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.700706481933594, | |
| "rewards/skywork_reward/std": 6.8410749435424805, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1494.0, | |
| "completions/max_terminated_length": 1494.0, | |
| "completions/mean_length": 555.943359375, | |
| "completions/mean_terminated_length": 555.943359375, | |
| "completions/min_length": 165.0, | |
| "completions/min_terminated_length": 165.0, | |
| "epoch": 0.0572289156626506, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2818851172924042, | |
| "learning_rate": 1e-06, | |
| "loss": 0.048, | |
| "num_tokens": 24116295.0, | |
| "reward": 20.025314331054688, | |
| "reward_std": 3.705512285232544, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.025314331054688, | |
| "rewards/skywork_reward/std": 6.649584770202637, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1384.0, | |
| "completions/mean_length": 556.890625, | |
| "completions/mean_terminated_length": 554.9745483398438, | |
| "completions/min_length": 103.0, | |
| "completions/min_terminated_length": 103.0, | |
| "epoch": 0.05798192771084337, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.29094648361206055, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0675, | |
| "num_tokens": 24448271.0, | |
| "reward": 19.403905868530273, | |
| "reward_std": 3.9135756492614746, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.403905868530273, | |
| "rewards/skywork_reward/std": 7.241198539733887, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1534.0, | |
| "completions/max_terminated_length": 1534.0, | |
| "completions/mean_length": 556.380859375, | |
| "completions/mean_terminated_length": 556.380859375, | |
| "completions/min_length": 85.0, | |
| "completions/min_terminated_length": 85.0, | |
| "epoch": 0.058734939759036146, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.29406511783599854, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0525, | |
| "num_tokens": 24778050.0, | |
| "reward": 20.131879806518555, | |
| "reward_std": 3.6868181228637695, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.131879806518555, | |
| "rewards/skywork_reward/std": 6.814955711364746, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1524.0, | |
| "completions/mean_length": 654.173828125, | |
| "completions/mean_terminated_length": 641.9505004882812, | |
| "completions/min_length": 247.0, | |
| "completions/min_terminated_length": 247.0, | |
| "epoch": 0.05948795180722891, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.713456153869629, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0458, | |
| "num_tokens": 25163019.0, | |
| "reward": 19.8343563079834, | |
| "reward_std": 3.734424114227295, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.8343563079834, | |
| "rewards/skywork_reward/std": 7.607394695281982, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1445.0, | |
| "completions/mean_length": 608.091796875, | |
| "completions/mean_terminated_length": 606.2759399414062, | |
| "completions/min_length": 258.0, | |
| "completions/min_terminated_length": 258.0, | |
| "epoch": 0.060240963855421686, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2803974449634552, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0548, | |
| "num_tokens": 25519930.0, | |
| "reward": 20.908302307128906, | |
| "reward_std": 4.699563980102539, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.908302307128906, | |
| "rewards/skywork_reward/std": 8.098281860351562, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1437.0, | |
| "completions/mean_length": 515.775390625, | |
| "completions/mean_terminated_length": 513.7788696289062, | |
| "completions/min_length": 168.0, | |
| "completions/min_terminated_length": 168.0, | |
| "epoch": 0.06099397590361446, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3042682707309723, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0298, | |
| "num_tokens": 25827463.0, | |
| "reward": 21.09960174560547, | |
| "reward_std": 4.133601188659668, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.09960174560547, | |
| "rewards/skywork_reward/std": 7.604403972625732, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1306.0, | |
| "completions/max_terminated_length": 1306.0, | |
| "completions/mean_length": 542.271484375, | |
| "completions/mean_terminated_length": 542.271484375, | |
| "completions/min_length": 188.0, | |
| "completions/min_terminated_length": 188.0, | |
| "epoch": 0.061746987951807226, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.292880117893219, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0317, | |
| "num_tokens": 26153682.0, | |
| "reward": 19.809471130371094, | |
| "reward_std": 3.9524645805358887, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.809471130371094, | |
| "rewards/skywork_reward/std": 5.885915279388428, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1386.0, | |
| "completions/mean_length": 550.228515625, | |
| "completions/mean_terminated_length": 544.41845703125, | |
| "completions/min_length": 132.0, | |
| "completions/min_terminated_length": 132.0, | |
| "epoch": 0.0625, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30422526597976685, | |
| "learning_rate": 1e-06, | |
| "loss": 0.031, | |
| "num_tokens": 26487431.0, | |
| "reward": 20.889541625976562, | |
| "reward_std": 3.701859474182129, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.889541625976562, | |
| "rewards/skywork_reward/std": 6.698598384857178, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1312.0, | |
| "completions/mean_length": 571.2109375, | |
| "completions/mean_terminated_length": 567.427490234375, | |
| "completions/min_length": 194.0, | |
| "completions/min_terminated_length": 194.0, | |
| "epoch": 0.06325301204819277, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.304601788520813, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0356, | |
| "num_tokens": 26825955.0, | |
| "reward": 19.412330627441406, | |
| "reward_std": 3.9354281425476074, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.412330627441406, | |
| "rewards/skywork_reward/std": 7.623223304748535, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1232.0, | |
| "completions/max_terminated_length": 1232.0, | |
| "completions/mean_length": 523.302734375, | |
| "completions/mean_terminated_length": 523.302734375, | |
| "completions/min_length": 215.0, | |
| "completions/min_terminated_length": 215.0, | |
| "epoch": 0.06400602409638555, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2882124185562134, | |
| "learning_rate": 1e-06, | |
| "loss": 0.029, | |
| "num_tokens": 27138190.0, | |
| "reward": 21.05793571472168, | |
| "reward_std": 3.831148862838745, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.05793571472168, | |
| "rewards/skywork_reward/std": 7.37190580368042, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1453.0, | |
| "completions/mean_length": 548.119140625, | |
| "completions/mean_terminated_length": 546.1859130859375, | |
| "completions/min_length": 167.0, | |
| "completions/min_terminated_length": 167.0, | |
| "epoch": 0.06475903614457831, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2833157181739807, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0274, | |
| "num_tokens": 27464027.0, | |
| "reward": 21.267467498779297, | |
| "reward_std": 3.6207642555236816, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.267467498779297, | |
| "rewards/skywork_reward/std": 7.28432559967041, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1466.0, | |
| "completions/mean_length": 555.1640625, | |
| "completions/mean_terminated_length": 543.5336303710938, | |
| "completions/min_length": 189.0, | |
| "completions/min_terminated_length": 189.0, | |
| "epoch": 0.06551204819277108, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2782001793384552, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0658, | |
| "num_tokens": 27796975.0, | |
| "reward": 21.733179092407227, | |
| "reward_std": 3.8758647441864014, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.733179092407227, | |
| "rewards/skywork_reward/std": 7.234414577484131, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1435.0, | |
| "completions/max_terminated_length": 1435.0, | |
| "completions/mean_length": 540.154296875, | |
| "completions/mean_terminated_length": 540.154296875, | |
| "completions/min_length": 68.0, | |
| "completions/min_terminated_length": 68.0, | |
| "epoch": 0.06626506024096386, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3396439254283905, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0201, | |
| "num_tokens": 28117070.0, | |
| "reward": 21.417911529541016, | |
| "reward_std": 3.201744556427002, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.417911529541016, | |
| "rewards/skywork_reward/std": 6.518486022949219, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1520.0, | |
| "completions/mean_length": 639.146484375, | |
| "completions/mean_terminated_length": 617.6220092773438, | |
| "completions/min_length": 229.0, | |
| "completions/min_terminated_length": 229.0, | |
| "epoch": 0.06701807228915663, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2612081468105316, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0429, | |
| "num_tokens": 28487609.0, | |
| "reward": 20.419099807739258, | |
| "reward_std": 3.7477428913116455, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.419099807739258, | |
| "rewards/skywork_reward/std": 7.6364030838012695, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1367.0, | |
| "completions/mean_length": 546.509765625, | |
| "completions/mean_terminated_length": 544.5733642578125, | |
| "completions/min_length": 190.0, | |
| "completions/min_terminated_length": 190.0, | |
| "epoch": 0.0677710843373494, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31919634342193604, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0386, | |
| "num_tokens": 28814782.0, | |
| "reward": 21.126543045043945, | |
| "reward_std": 3.912198066711426, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.126543045043945, | |
| "rewards/skywork_reward/std": 7.0028533935546875, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1289.0, | |
| "completions/mean_length": 560.15625, | |
| "completions/mean_terminated_length": 556.3294677734375, | |
| "completions/min_length": 194.0, | |
| "completions/min_terminated_length": 194.0, | |
| "epoch": 0.06852409638554217, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3072337508201599, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0445, | |
| "num_tokens": 29144750.0, | |
| "reward": 19.54640007019043, | |
| "reward_std": 3.560420513153076, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.54640007019043, | |
| "rewards/skywork_reward/std": 7.461215019226074, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1483.0, | |
| "completions/mean_length": 601.64453125, | |
| "completions/mean_terminated_length": 599.8160400390625, | |
| "completions/min_length": 170.0, | |
| "completions/min_terminated_length": 170.0, | |
| "epoch": 0.06927710843373494, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2847882807254791, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0469, | |
| "num_tokens": 29494344.0, | |
| "reward": 19.150531768798828, | |
| "reward_std": 3.979829788208008, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.150531768798828, | |
| "rewards/skywork_reward/std": 7.566142559051514, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1462.0, | |
| "completions/mean_length": 602.068359375, | |
| "completions/mean_terminated_length": 600.24072265625, | |
| "completions/min_length": 102.0, | |
| "completions/min_terminated_length": 102.0, | |
| "epoch": 0.07003012048192771, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.26867735385894775, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0211, | |
| "num_tokens": 29846395.0, | |
| "reward": 20.15349578857422, | |
| "reward_std": 3.5188255310058594, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.15349578857422, | |
| "rewards/skywork_reward/std": 6.31872034072876, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1406.0, | |
| "completions/mean_length": 609.859375, | |
| "completions/mean_terminated_length": 608.0469360351562, | |
| "completions/min_length": 287.0, | |
| "completions/min_terminated_length": 287.0, | |
| "epoch": 0.07078313253012049, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.28086456656455994, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0549, | |
| "num_tokens": 30203763.0, | |
| "reward": 22.653549194335938, | |
| "reward_std": 3.634199857711792, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.653549194335938, | |
| "rewards/skywork_reward/std": 7.612020969390869, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1342.0, | |
| "completions/max_terminated_length": 1342.0, | |
| "completions/mean_length": 564.158203125, | |
| "completions/mean_terminated_length": 564.158203125, | |
| "completions/min_length": 129.0, | |
| "completions/min_terminated_length": 129.0, | |
| "epoch": 0.07153614457831325, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3087657690048218, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0215, | |
| "num_tokens": 30535428.0, | |
| "reward": 20.03140640258789, | |
| "reward_std": 3.1922054290771484, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.03140640258789, | |
| "rewards/skywork_reward/std": 6.323237419128418, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1477.0, | |
| "completions/mean_length": 591.076171875, | |
| "completions/mean_terminated_length": 570.329345703125, | |
| "completions/min_length": 157.0, | |
| "completions/min_terminated_length": 157.0, | |
| "epoch": 0.07228915662650602, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2983706295490265, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0296, | |
| "num_tokens": 30883611.0, | |
| "reward": 17.560230255126953, | |
| "reward_std": 3.3740811347961426, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 17.560230255126953, | |
| "rewards/skywork_reward/std": 7.882108211517334, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1092.0, | |
| "completions/mean_length": 549.53515625, | |
| "completions/mean_terminated_length": 535.8613891601562, | |
| "completions/min_length": 161.0, | |
| "completions/min_terminated_length": 161.0, | |
| "epoch": 0.0730421686746988, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3238271474838257, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0384, | |
| "num_tokens": 31210173.0, | |
| "reward": 20.257064819335938, | |
| "reward_std": 3.4314146041870117, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.257064819335938, | |
| "rewards/skywork_reward/std": 7.943423271179199, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1285.0, | |
| "completions/max_terminated_length": 1285.0, | |
| "completions/mean_length": 505.283203125, | |
| "completions/mean_terminated_length": 505.283203125, | |
| "completions/min_length": 63.0, | |
| "completions/min_terminated_length": 63.0, | |
| "epoch": 0.07379518072289157, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33978375792503357, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0432, | |
| "num_tokens": 31517790.0, | |
| "reward": 22.60009765625, | |
| "reward_std": 3.893589496612549, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.60009765625, | |
| "rewards/skywork_reward/std": 6.1815080642700195, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1375.0, | |
| "completions/max_terminated_length": 1375.0, | |
| "completions/mean_length": 537.619140625, | |
| "completions/mean_terminated_length": 537.619140625, | |
| "completions/min_length": 143.0, | |
| "completions/min_terminated_length": 143.0, | |
| "epoch": 0.07454819277108433, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3200412094593048, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0187, | |
| "num_tokens": 31837723.0, | |
| "reward": 19.1778564453125, | |
| "reward_std": 3.4576809406280518, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.1778564453125, | |
| "rewards/skywork_reward/std": 8.491567611694336, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1507.0, | |
| "completions/mean_length": 560.501953125, | |
| "completions/mean_terminated_length": 552.8208618164062, | |
| "completions/min_length": 145.0, | |
| "completions/min_terminated_length": 145.0, | |
| "epoch": 0.07530120481927711, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30854177474975586, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0291, | |
| "num_tokens": 32171164.0, | |
| "reward": 20.515422821044922, | |
| "reward_std": 3.6772897243499756, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.515422821044922, | |
| "rewards/skywork_reward/std": 6.499829292297363, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1443.0, | |
| "completions/mean_length": 553.505859375, | |
| "completions/mean_terminated_length": 551.5831909179688, | |
| "completions/min_length": 136.0, | |
| "completions/min_terminated_length": 136.0, | |
| "epoch": 0.07605421686746988, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3389620780944824, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0467, | |
| "num_tokens": 32497007.0, | |
| "reward": 19.85546875, | |
| "reward_std": 4.108412265777588, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.85546875, | |
| "rewards/skywork_reward/std": 6.893716812133789, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1468.0, | |
| "completions/mean_length": 544.18359375, | |
| "completions/mean_terminated_length": 532.4229736328125, | |
| "completions/min_length": 198.0, | |
| "completions/min_terminated_length": 198.0, | |
| "epoch": 0.07680722891566265, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31205376982688904, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0342, | |
| "num_tokens": 32825373.0, | |
| "reward": 21.427410125732422, | |
| "reward_std": 4.496371269226074, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.427410125732422, | |
| "rewards/skywork_reward/std": 8.456439018249512, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1345.0, | |
| "completions/max_terminated_length": 1345.0, | |
| "completions/mean_length": 530.46875, | |
| "completions/mean_terminated_length": 530.46875, | |
| "completions/min_length": 186.0, | |
| "completions/min_terminated_length": 186.0, | |
| "epoch": 0.07756024096385543, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3013162314891815, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0147, | |
| "num_tokens": 33142141.0, | |
| "reward": 20.615509033203125, | |
| "reward_std": 2.9499316215515137, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.615509033203125, | |
| "rewards/skywork_reward/std": 5.820370674133301, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1393.0, | |
| "completions/mean_length": 543.732421875, | |
| "completions/mean_terminated_length": 541.7905883789062, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "epoch": 0.0783132530120482, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31271547079086304, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0393, | |
| "num_tokens": 33467588.0, | |
| "reward": 20.930374145507812, | |
| "reward_std": 3.4611310958862305, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.930374145507812, | |
| "rewards/skywork_reward/std": 6.854768753051758, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1375.0, | |
| "completions/max_terminated_length": 1375.0, | |
| "completions/mean_length": 561.560546875, | |
| "completions/mean_terminated_length": 561.560546875, | |
| "completions/min_length": 219.0, | |
| "completions/min_terminated_length": 219.0, | |
| "epoch": 0.07906626506024096, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30660298466682434, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0255, | |
| "num_tokens": 33798691.0, | |
| "reward": 17.608062744140625, | |
| "reward_std": 3.2770185470581055, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 17.608062744140625, | |
| "rewards/skywork_reward/std": 7.616006851196289, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1508.0, | |
| "completions/mean_length": 541.017578125, | |
| "completions/mean_terminated_length": 523.2146606445312, | |
| "completions/min_length": 126.0, | |
| "completions/min_terminated_length": 126.0, | |
| "epoch": 0.07981927710843373, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.32308229804039, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0226, | |
| "num_tokens": 34123004.0, | |
| "reward": 20.36962890625, | |
| "reward_std": 3.120267152786255, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.36962890625, | |
| "rewards/skywork_reward/std": 7.793717861175537, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1401.0, | |
| "completions/mean_length": 521.126953125, | |
| "completions/mean_terminated_length": 519.140869140625, | |
| "completions/min_length": 114.0, | |
| "completions/min_terminated_length": 114.0, | |
| "epoch": 0.08057228915662651, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31717127561569214, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0358, | |
| "num_tokens": 34435741.0, | |
| "reward": 21.01158905029297, | |
| "reward_std": 3.5439646244049072, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.01158905029297, | |
| "rewards/skywork_reward/std": 8.22716999053955, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1532.0, | |
| "completions/mean_length": 624.4296875, | |
| "completions/mean_terminated_length": 602.552001953125, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "epoch": 0.08132530120481928, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2963525652885437, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0347, | |
| "num_tokens": 34802345.0, | |
| "reward": 20.850997924804688, | |
| "reward_std": 3.9396917819976807, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.850997924804688, | |
| "rewards/skywork_reward/std": 7.685678958892822, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1478.0, | |
| "completions/max_terminated_length": 1478.0, | |
| "completions/mean_length": 587.759765625, | |
| "completions/mean_terminated_length": 587.759765625, | |
| "completions/min_length": 174.0, | |
| "completions/min_terminated_length": 174.0, | |
| "epoch": 0.08207831325301204, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3052363991737366, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0248, | |
| "num_tokens": 35147342.0, | |
| "reward": 19.248363494873047, | |
| "reward_std": 3.9026362895965576, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.248363494873047, | |
| "rewards/skywork_reward/std": 7.5164618492126465, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1386.0, | |
| "completions/max_terminated_length": 1386.0, | |
| "completions/mean_length": 592.96875, | |
| "completions/mean_terminated_length": 592.96875, | |
| "completions/min_length": 180.0, | |
| "completions/min_terminated_length": 180.0, | |
| "epoch": 0.08283132530120482, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2893350124359131, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0428, | |
| "num_tokens": 35498750.0, | |
| "reward": 20.73992919921875, | |
| "reward_std": 4.127569675445557, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.73992919921875, | |
| "rewards/skywork_reward/std": 7.064544677734375, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1275.0, | |
| "completions/max_terminated_length": 1275.0, | |
| "completions/mean_length": 580.20703125, | |
| "completions/mean_terminated_length": 580.20703125, | |
| "completions/min_length": 217.0, | |
| "completions/min_terminated_length": 217.0, | |
| "epoch": 0.08358433734939759, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2851979732513428, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0143, | |
| "num_tokens": 35838488.0, | |
| "reward": 22.53789520263672, | |
| "reward_std": 3.5906736850738525, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.53789520263672, | |
| "rewards/skywork_reward/std": 7.760654449462891, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1381.0, | |
| "completions/mean_length": 562.802734375, | |
| "completions/mean_terminated_length": 560.8982543945312, | |
| "completions/min_length": 204.0, | |
| "completions/min_terminated_length": 204.0, | |
| "epoch": 0.08433734939759036, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3198689818382263, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0329, | |
| "num_tokens": 36174899.0, | |
| "reward": 18.075515747070312, | |
| "reward_std": 3.7063498497009277, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.075515747070312, | |
| "rewards/skywork_reward/std": 8.073354721069336, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1242.0, | |
| "completions/mean_length": 566.4921875, | |
| "completions/mean_terminated_length": 564.5949096679688, | |
| "completions/min_length": 156.0, | |
| "completions/min_terminated_length": 156.0, | |
| "epoch": 0.08509036144578314, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2963120639324188, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0291, | |
| "num_tokens": 36510591.0, | |
| "reward": 19.600627899169922, | |
| "reward_std": 3.769819736480713, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.600627899169922, | |
| "rewards/skywork_reward/std": 8.610282897949219, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1418.0, | |
| "completions/mean_length": 620.0, | |
| "completions/mean_terminated_length": 618.2074584960938, | |
| "completions/min_length": 196.0, | |
| "completions/min_terminated_length": 196.0, | |
| "epoch": 0.0858433734939759, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.28681400418281555, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0203, | |
| "num_tokens": 36873903.0, | |
| "reward": 19.95730209350586, | |
| "reward_std": 3.9464974403381348, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.95730209350586, | |
| "rewards/skywork_reward/std": 7.085601806640625, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1423.0, | |
| "completions/mean_length": 568.66796875, | |
| "completions/mean_terminated_length": 566.7749633789062, | |
| "completions/min_length": 199.0, | |
| "completions/min_terminated_length": 199.0, | |
| "epoch": 0.08659638554216867, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3024079501628876, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0374, | |
| "num_tokens": 37214469.0, | |
| "reward": 20.049148559570312, | |
| "reward_std": 3.8342490196228027, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.049148559570312, | |
| "rewards/skywork_reward/std": 7.704470634460449, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 600.234375, | |
| "completions/mean_terminated_length": 596.5647583007812, | |
| "completions/min_length": 184.0, | |
| "completions/min_terminated_length": 184.0, | |
| "epoch": 0.08734939759036145, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3003012239933014, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0454, | |
| "num_tokens": 37569709.0, | |
| "reward": 19.68072509765625, | |
| "reward_std": 4.329319953918457, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.68072509765625, | |
| "rewards/skywork_reward/std": 7.497686386108398, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1498.0, | |
| "completions/mean_length": 607.294921875, | |
| "completions/mean_terminated_length": 605.4774780273438, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "epoch": 0.08810240963855422, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27138060331344604, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0173, | |
| "num_tokens": 37926180.0, | |
| "reward": 22.437545776367188, | |
| "reward_std": 3.5447933673858643, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.437545776367188, | |
| "rewards/skywork_reward/std": 6.564459323883057, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1526.0, | |
| "completions/max_terminated_length": 1526.0, | |
| "completions/mean_length": 588.505859375, | |
| "completions/mean_terminated_length": 588.505859375, | |
| "completions/min_length": 164.0, | |
| "completions/min_terminated_length": 164.0, | |
| "epoch": 0.08885542168674698, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27087512612342834, | |
| "learning_rate": 1e-06, | |
| "loss": 0.031, | |
| "num_tokens": 38270599.0, | |
| "reward": 21.066497802734375, | |
| "reward_std": 3.951112985610962, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.066497802734375, | |
| "rewards/skywork_reward/std": 7.548466205596924, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1512.0, | |
| "completions/max_terminated_length": 1512.0, | |
| "completions/mean_length": 568.57421875, | |
| "completions/mean_terminated_length": 568.57421875, | |
| "completions/min_length": 228.0, | |
| "completions/min_terminated_length": 228.0, | |
| "epoch": 0.08960843373493976, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2935068607330322, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0364, | |
| "num_tokens": 38607181.0, | |
| "reward": 18.969436645507812, | |
| "reward_std": 3.821528434753418, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 18.969436645507812, | |
| "rewards/skywork_reward/std": 8.197724342346191, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1475.0, | |
| "completions/mean_length": 571.787109375, | |
| "completions/mean_terminated_length": 566.1041259765625, | |
| "completions/min_length": 159.0, | |
| "completions/min_terminated_length": 159.0, | |
| "epoch": 0.09036144578313253, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.32413604855537415, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0357, | |
| "num_tokens": 38945056.0, | |
| "reward": 19.515613555908203, | |
| "reward_std": 3.548654556274414, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.515613555908203, | |
| "rewards/skywork_reward/std": 7.051810264587402, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1199.0, | |
| "completions/max_terminated_length": 1199.0, | |
| "completions/mean_length": 541.04296875, | |
| "completions/mean_terminated_length": 541.04296875, | |
| "completions/min_length": 117.0, | |
| "completions/min_terminated_length": 117.0, | |
| "epoch": 0.0911144578313253, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33912891149520874, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0302, | |
| "num_tokens": 39266406.0, | |
| "reward": 21.350250244140625, | |
| "reward_std": 3.481703758239746, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.350250244140625, | |
| "rewards/skywork_reward/std": 7.860495567321777, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1444.0, | |
| "completions/mean_length": 608.25390625, | |
| "completions/mean_terminated_length": 595.3941040039062, | |
| "completions/min_length": 164.0, | |
| "completions/min_terminated_length": 164.0, | |
| "epoch": 0.09186746987951808, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27568143606185913, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0365, | |
| "num_tokens": 39626184.0, | |
| "reward": 20.61075210571289, | |
| "reward_std": 3.709376335144043, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.61075210571289, | |
| "rewards/skywork_reward/std": 7.836774826049805, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1520.0, | |
| "completions/mean_length": 671.375, | |
| "completions/mean_terminated_length": 664.5669555664062, | |
| "completions/min_length": 231.0, | |
| "completions/min_terminated_length": 231.0, | |
| "epoch": 0.09262048192771084, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2713894844055176, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0227, | |
| "num_tokens": 40013720.0, | |
| "reward": 19.10358428955078, | |
| "reward_std": 3.289210081100464, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.10358428955078, | |
| "rewards/skywork_reward/std": 8.462961196899414, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1436.0, | |
| "completions/mean_length": 618.333984375, | |
| "completions/mean_terminated_length": 614.7353515625, | |
| "completions/min_length": 162.0, | |
| "completions/min_terminated_length": 162.0, | |
| "epoch": 0.09337349397590361, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.28417181968688965, | |
| "learning_rate": 1e-06, | |
| "loss": 0.045, | |
| "num_tokens": 40377171.0, | |
| "reward": 19.55655288696289, | |
| "reward_std": 4.263092994689941, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.55655288696289, | |
| "rewards/skywork_reward/std": 7.342254161834717, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1383.0, | |
| "completions/max_terminated_length": 1383.0, | |
| "completions/mean_length": 596.92578125, | |
| "completions/mean_terminated_length": 596.92578125, | |
| "completions/min_length": 236.0, | |
| "completions/min_terminated_length": 236.0, | |
| "epoch": 0.09412650602409639, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.28085947036743164, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0195, | |
| "num_tokens": 40728669.0, | |
| "reward": 21.083993911743164, | |
| "reward_std": 3.448134422302246, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.083993911743164, | |
| "rewards/skywork_reward/std": 6.892836570739746, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1493.0, | |
| "completions/mean_length": 575.734375, | |
| "completions/mean_terminated_length": 573.8551635742188, | |
| "completions/min_length": 145.0, | |
| "completions/min_terminated_length": 145.0, | |
| "epoch": 0.09487951807228916, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30826082825660706, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0245, | |
| "num_tokens": 41069797.0, | |
| "reward": 21.492095947265625, | |
| "reward_std": 3.321489095687866, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.492095947265625, | |
| "rewards/skywork_reward/std": 7.797940254211426, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1433.0, | |
| "completions/mean_length": 586.591796875, | |
| "completions/mean_terminated_length": 580.99609375, | |
| "completions/min_length": 218.0, | |
| "completions/min_terminated_length": 218.0, | |
| "epoch": 0.09563253012048192, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31485578417778015, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0292, | |
| "num_tokens": 41417188.0, | |
| "reward": 19.127901077270508, | |
| "reward_std": 3.9224164485931396, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.127901077270508, | |
| "rewards/skywork_reward/std": 7.79984712600708, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1366.0, | |
| "completions/max_terminated_length": 1366.0, | |
| "completions/mean_length": 515.953125, | |
| "completions/mean_terminated_length": 515.953125, | |
| "completions/min_length": 174.0, | |
| "completions/min_terminated_length": 174.0, | |
| "epoch": 0.0963855421686747, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3710417151451111, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0218, | |
| "num_tokens": 41725820.0, | |
| "reward": 21.463088989257812, | |
| "reward_std": 3.2953317165374756, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.463088989257812, | |
| "rewards/skywork_reward/std": 7.579995155334473, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1239.0, | |
| "completions/mean_length": 565.0234375, | |
| "completions/mean_terminated_length": 563.123291015625, | |
| "completions/min_length": 183.0, | |
| "completions/min_terminated_length": 183.0, | |
| "epoch": 0.09713855421686747, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3211367428302765, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0347, | |
| "num_tokens": 42057704.0, | |
| "reward": 21.086503982543945, | |
| "reward_std": 3.46575927734375, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.086503982543945, | |
| "rewards/skywork_reward/std": 7.8865861892700195, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1267.0, | |
| "completions/mean_length": 560.736328125, | |
| "completions/mean_terminated_length": 553.0570678710938, | |
| "completions/min_length": 179.0, | |
| "completions/min_terminated_length": 179.0, | |
| "epoch": 0.09789156626506024, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4249287545681, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0553, | |
| "num_tokens": 42388433.0, | |
| "reward": 19.455886840820312, | |
| "reward_std": 3.8543548583984375, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.455886840820312, | |
| "rewards/skywork_reward/std": 7.918321132659912, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1415.0, | |
| "completions/max_terminated_length": 1415.0, | |
| "completions/mean_length": 542.6484375, | |
| "completions/mean_terminated_length": 542.6484375, | |
| "completions/min_length": 153.0, | |
| "completions/min_terminated_length": 153.0, | |
| "epoch": 0.09864457831325302, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3672284483909607, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0236, | |
| "num_tokens": 42711533.0, | |
| "reward": 21.042701721191406, | |
| "reward_std": 4.084582805633545, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.042701721191406, | |
| "rewards/skywork_reward/std": 8.580524444580078, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1478.0, | |
| "completions/mean_length": 606.06640625, | |
| "completions/mean_terminated_length": 600.5855102539062, | |
| "completions/min_length": 174.0, | |
| "completions/min_terminated_length": 174.0, | |
| "epoch": 0.09939759036144578, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3537883758544922, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0434, | |
| "num_tokens": 43065375.0, | |
| "reward": 22.385543823242188, | |
| "reward_std": 4.083456516265869, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.385543823242188, | |
| "rewards/skywork_reward/std": 7.7553229331970215, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1357.0, | |
| "completions/max_terminated_length": 1357.0, | |
| "completions/mean_length": 638.109375, | |
| "completions/mean_terminated_length": 638.109375, | |
| "completions/min_length": 209.0, | |
| "completions/min_terminated_length": 209.0, | |
| "epoch": 0.10015060240963855, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31206345558166504, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0194, | |
| "num_tokens": 43435015.0, | |
| "reward": 23.201324462890625, | |
| "reward_std": 3.3933279514312744, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.201324462890625, | |
| "rewards/skywork_reward/std": 8.763078689575195, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1519.0, | |
| "completions/mean_length": 683.912109375, | |
| "completions/mean_terminated_length": 678.8900146484375, | |
| "completions/min_length": 240.0, | |
| "completions/min_terminated_length": 240.0, | |
| "epoch": 0.10090361445783133, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3065231144428253, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0145, | |
| "num_tokens": 43832490.0, | |
| "reward": 21.31167221069336, | |
| "reward_std": 3.8207640647888184, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.31167221069336, | |
| "rewards/skywork_reward/std": 6.737093448638916, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1257.0, | |
| "completions/max_terminated_length": 1257.0, | |
| "completions/mean_length": 599.5703125, | |
| "completions/mean_terminated_length": 599.5703125, | |
| "completions/min_length": 253.0, | |
| "completions/min_terminated_length": 253.0, | |
| "epoch": 0.1016566265060241, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3484726548194885, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0294, | |
| "num_tokens": 44187758.0, | |
| "reward": 22.86431884765625, | |
| "reward_std": 3.3447577953338623, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.86431884765625, | |
| "rewards/skywork_reward/std": 7.047182083129883, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1471.0, | |
| "completions/max_terminated_length": 1471.0, | |
| "completions/mean_length": 613.40625, | |
| "completions/mean_terminated_length": 613.40625, | |
| "completions/min_length": 218.0, | |
| "completions/min_terminated_length": 218.0, | |
| "epoch": 0.10240963855421686, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3716602623462677, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0281, | |
| "num_tokens": 44547310.0, | |
| "reward": 22.30841064453125, | |
| "reward_std": 3.4224910736083984, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.30841064453125, | |
| "rewards/skywork_reward/std": 7.069167137145996, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1517.0, | |
| "completions/mean_length": 641.091796875, | |
| "completions/mean_terminated_length": 635.8173217773438, | |
| "completions/min_length": 190.0, | |
| "completions/min_terminated_length": 190.0, | |
| "epoch": 0.10316265060240964, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.34256511926651, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0295, | |
| "num_tokens": 44925341.0, | |
| "reward": 19.721071243286133, | |
| "reward_std": 3.7587203979492188, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.721071243286133, | |
| "rewards/skywork_reward/std": 7.847194194793701, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1499.0, | |
| "completions/max_terminated_length": 1499.0, | |
| "completions/mean_length": 639.66796875, | |
| "completions/mean_terminated_length": 639.66796875, | |
| "completions/min_length": 230.0, | |
| "completions/min_terminated_length": 230.0, | |
| "epoch": 0.10391566265060241, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33065295219421387, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0388, | |
| "num_tokens": 45295539.0, | |
| "reward": 21.96673583984375, | |
| "reward_std": 3.900968074798584, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.96673583984375, | |
| "rewards/skywork_reward/std": 7.551266670227051, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1523.0, | |
| "completions/max_terminated_length": 1523.0, | |
| "completions/mean_length": 610.98828125, | |
| "completions/mean_terminated_length": 610.98828125, | |
| "completions/min_length": 246.0, | |
| "completions/min_terminated_length": 246.0, | |
| "epoch": 0.10466867469879518, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3167376220226288, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0114, | |
| "num_tokens": 45658877.0, | |
| "reward": 19.854530334472656, | |
| "reward_std": 4.059847354888916, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.854530334472656, | |
| "rewards/skywork_reward/std": 6.8408002853393555, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1457.0, | |
| "completions/mean_length": 653.609375, | |
| "completions/mean_terminated_length": 651.882568359375, | |
| "completions/min_length": 230.0, | |
| "completions/min_terminated_length": 230.0, | |
| "epoch": 0.10542168674698796, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.29037731885910034, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0306, | |
| "num_tokens": 46037205.0, | |
| "reward": 20.99395751953125, | |
| "reward_std": 3.6438076496124268, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.99395751953125, | |
| "rewards/skywork_reward/std": 6.79690408706665, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1523.0, | |
| "completions/mean_length": 633.763671875, | |
| "completions/mean_terminated_length": 631.998046875, | |
| "completions/min_length": 190.0, | |
| "completions/min_terminated_length": 190.0, | |
| "epoch": 0.10617469879518072, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2938140332698822, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0272, | |
| "num_tokens": 46405644.0, | |
| "reward": 21.723541259765625, | |
| "reward_std": 4.110295295715332, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.723541259765625, | |
| "rewards/skywork_reward/std": 6.7619757652282715, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1354.0, | |
| "completions/mean_length": 589.501953125, | |
| "completions/mean_terminated_length": 580.1676635742188, | |
| "completions/min_length": 107.0, | |
| "completions/min_terminated_length": 107.0, | |
| "epoch": 0.10692771084337349, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3247605264186859, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0524, | |
| "num_tokens": 46749773.0, | |
| "reward": 19.817337036132812, | |
| "reward_std": 3.662755012512207, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.817337036132812, | |
| "rewards/skywork_reward/std": 7.711267948150635, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1359.0, | |
| "completions/max_terminated_length": 1359.0, | |
| "completions/mean_length": 625.251953125, | |
| "completions/mean_terminated_length": 625.251953125, | |
| "completions/min_length": 157.0, | |
| "completions/min_terminated_length": 157.0, | |
| "epoch": 0.10768072289156627, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3041718900203705, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0231, | |
| "num_tokens": 47112942.0, | |
| "reward": 21.378835678100586, | |
| "reward_std": 3.913424491882324, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.378835678100586, | |
| "rewards/skywork_reward/std": 7.104243755340576, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1508.0, | |
| "completions/mean_length": 646.98828125, | |
| "completions/mean_terminated_length": 631.0814819335938, | |
| "completions/min_length": 195.0, | |
| "completions/min_terminated_length": 195.0, | |
| "epoch": 0.10843373493975904, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2911466062068939, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0378, | |
| "num_tokens": 47491000.0, | |
| "reward": 21.82143211364746, | |
| "reward_std": 3.859783887863159, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.82143211364746, | |
| "rewards/skywork_reward/std": 9.108434677124023, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1434.0, | |
| "completions/mean_length": 588.384765625, | |
| "completions/mean_terminated_length": 586.5303344726562, | |
| "completions/min_length": 197.0, | |
| "completions/min_terminated_length": 197.0, | |
| "epoch": 0.1091867469879518, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30668869614601135, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0326, | |
| "num_tokens": 47839837.0, | |
| "reward": 21.99334716796875, | |
| "reward_std": 3.588073968887329, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.99334716796875, | |
| "rewards/skywork_reward/std": 8.704425811767578, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1261.0, | |
| "completions/mean_length": 647.630859375, | |
| "completions/mean_terminated_length": 645.892333984375, | |
| "completions/min_length": 271.0, | |
| "completions/min_terminated_length": 271.0, | |
| "epoch": 0.10993975903614457, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.29260244965553284, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0329, | |
| "num_tokens": 48214544.0, | |
| "reward": 21.543075561523438, | |
| "reward_std": 3.8497776985168457, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.543075561523438, | |
| "rewards/skywork_reward/std": 8.165260314941406, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1532.0, | |
| "completions/mean_length": 726.556640625, | |
| "completions/mean_terminated_length": 720.1830444335938, | |
| "completions/min_length": 272.0, | |
| "completions/min_terminated_length": 272.0, | |
| "epoch": 0.11069277108433735, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.27006274461746216, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0695, | |
| "num_tokens": 48635277.0, | |
| "reward": 19.606515884399414, | |
| "reward_std": 4.228018760681152, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.606515884399414, | |
| "rewards/skywork_reward/std": 6.9952826499938965, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1506.0, | |
| "completions/mean_length": 619.673828125, | |
| "completions/mean_terminated_length": 614.2731323242188, | |
| "completions/min_length": 162.0, | |
| "completions/min_terminated_length": 162.0, | |
| "epoch": 0.11144578313253012, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2880130410194397, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0294, | |
| "num_tokens": 48994470.0, | |
| "reward": 20.1602783203125, | |
| "reward_std": 3.2607784271240234, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.1602783203125, | |
| "rewards/skywork_reward/std": 6.346843719482422, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1392.0, | |
| "completions/mean_length": 629.67578125, | |
| "completions/mean_terminated_length": 627.9021606445312, | |
| "completions/min_length": 123.0, | |
| "completions/min_terminated_length": 123.0, | |
| "epoch": 0.11219879518072289, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2898614704608917, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0405, | |
| "num_tokens": 49363600.0, | |
| "reward": 22.240509033203125, | |
| "reward_std": 3.9023704528808594, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.240509033203125, | |
| "rewards/skywork_reward/std": 7.541024208068848, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1434.0, | |
| "completions/max_terminated_length": 1434.0, | |
| "completions/mean_length": 629.8828125, | |
| "completions/mean_terminated_length": 629.8828125, | |
| "completions/min_length": 177.0, | |
| "completions/min_terminated_length": 177.0, | |
| "epoch": 0.11295180722891567, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2920468747615814, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0138, | |
| "num_tokens": 49729284.0, | |
| "reward": 21.534242630004883, | |
| "reward_std": 3.1994595527648926, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.534242630004883, | |
| "rewards/skywork_reward/std": 7.222407341003418, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1490.0, | |
| "completions/mean_length": 638.59765625, | |
| "completions/mean_terminated_length": 615.2184448242188, | |
| "completions/min_length": 222.0, | |
| "completions/min_terminated_length": 222.0, | |
| "epoch": 0.11370481927710843, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2872624397277832, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0157, | |
| "num_tokens": 50105238.0, | |
| "reward": 19.462825775146484, | |
| "reward_std": 3.6386303901672363, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.462825775146484, | |
| "rewards/skywork_reward/std": 7.454095363616943, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1464.0, | |
| "completions/mean_length": 575.103515625, | |
| "completions/mean_terminated_length": 567.5374145507812, | |
| "completions/min_length": 148.0, | |
| "completions/min_terminated_length": 148.0, | |
| "epoch": 0.1144578313253012, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3207628130912781, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0549, | |
| "num_tokens": 50444907.0, | |
| "reward": 20.47686767578125, | |
| "reward_std": 3.3109498023986816, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.47686767578125, | |
| "rewards/skywork_reward/std": 7.746679306030273, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1445.0, | |
| "completions/mean_length": 629.671875, | |
| "completions/mean_terminated_length": 626.11767578125, | |
| "completions/min_length": 100.0, | |
| "completions/min_terminated_length": 100.0, | |
| "epoch": 0.11521084337349398, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2737574875354767, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0135, | |
| "num_tokens": 50814211.0, | |
| "reward": 20.94318199157715, | |
| "reward_std": 4.387291431427002, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.94318199157715, | |
| "rewards/skywork_reward/std": 8.372903823852539, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1522.0, | |
| "completions/mean_length": 580.333984375, | |
| "completions/mean_terminated_length": 569.0020141601562, | |
| "completions/min_length": 164.0, | |
| "completions/min_terminated_length": 164.0, | |
| "epoch": 0.11596385542168675, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.32104018330574036, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0363, | |
| "num_tokens": 51156158.0, | |
| "reward": 21.5543212890625, | |
| "reward_std": 3.9272093772888184, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.5543212890625, | |
| "rewards/skywork_reward/std": 6.690492153167725, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1456.0, | |
| "completions/mean_length": 579.384765625, | |
| "completions/mean_terminated_length": 569.95068359375, | |
| "completions/min_length": 223.0, | |
| "completions/min_terminated_length": 223.0, | |
| "epoch": 0.11671686746987951, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30905836820602417, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0484, | |
| "num_tokens": 51498627.0, | |
| "reward": 21.084388732910156, | |
| "reward_std": 3.953840494155884, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.084388732910156, | |
| "rewards/skywork_reward/std": 8.337218284606934, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1343.0, | |
| "completions/max_terminated_length": 1343.0, | |
| "completions/mean_length": 585.482421875, | |
| "completions/mean_terminated_length": 585.482421875, | |
| "completions/min_length": 256.0, | |
| "completions/min_terminated_length": 256.0, | |
| "epoch": 0.11746987951807229, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.2910420000553131, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0174, | |
| "num_tokens": 51845034.0, | |
| "reward": 24.57891845703125, | |
| "reward_std": 3.3543262481689453, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 24.57891845703125, | |
| "rewards/skywork_reward/std": 6.263175010681152, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1421.0, | |
| "completions/mean_length": 614.875, | |
| "completions/mean_terminated_length": 590.8777465820312, | |
| "completions/min_length": 160.0, | |
| "completions/min_terminated_length": 160.0, | |
| "epoch": 0.11822289156626506, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30173835158348083, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0252, | |
| "num_tokens": 52204538.0, | |
| "reward": 21.792831420898438, | |
| "reward_std": 2.7370691299438477, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.792831420898438, | |
| "rewards/skywork_reward/std": 7.606500148773193, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1515.0, | |
| "completions/mean_length": 575.78125, | |
| "completions/mean_terminated_length": 572.0157470703125, | |
| "completions/min_length": 225.0, | |
| "completions/min_terminated_length": 225.0, | |
| "epoch": 0.11897590361445783, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3065696656703949, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0432, | |
| "num_tokens": 52541514.0, | |
| "reward": 21.47296142578125, | |
| "reward_std": 3.3334720134735107, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.47296142578125, | |
| "rewards/skywork_reward/std": 8.22110652923584, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1460.0, | |
| "completions/max_terminated_length": 1460.0, | |
| "completions/mean_length": 588.296875, | |
| "completions/mean_terminated_length": 588.296875, | |
| "completions/min_length": 170.0, | |
| "completions/min_terminated_length": 170.0, | |
| "epoch": 0.1197289156626506, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.30373716354370117, | |
| "learning_rate": 1e-06, | |
| "loss": 0.023, | |
| "num_tokens": 52887346.0, | |
| "reward": 21.733245849609375, | |
| "reward_std": 3.1107327938079834, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.733245849609375, | |
| "rewards/skywork_reward/std": 7.991820335388184, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1281.0, | |
| "completions/mean_length": 575.61328125, | |
| "completions/mean_terminated_length": 573.7338256835938, | |
| "completions/min_length": 190.0, | |
| "completions/min_terminated_length": 190.0, | |
| "epoch": 0.12048192771084337, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3277619183063507, | |
| "learning_rate": 1e-06, | |
| "loss": 0.027, | |
| "num_tokens": 53228412.0, | |
| "reward": 19.673015594482422, | |
| "reward_std": 3.533682107925415, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 19.673015594482422, | |
| "rewards/skywork_reward/std": 7.22337532043457, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1520.0, | |
| "completions/mean_length": 553.216796875, | |
| "completions/mean_terminated_length": 551.2935180664062, | |
| "completions/min_length": 170.0, | |
| "completions/min_terminated_length": 170.0, | |
| "epoch": 0.12123493975903614, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33466610312461853, | |
| "learning_rate": 1e-06, | |
| "loss": 0.029, | |
| "num_tokens": 53559739.0, | |
| "reward": 20.984954833984375, | |
| "reward_std": 3.4584217071533203, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.984954833984375, | |
| "rewards/skywork_reward/std": 6.437849521636963, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1488.0, | |
| "completions/mean_length": 622.49609375, | |
| "completions/mean_terminated_length": 618.9137573242188, | |
| "completions/min_length": 191.0, | |
| "completions/min_terminated_length": 191.0, | |
| "epoch": 0.12198795180722892, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3004685342311859, | |
| "learning_rate": 1e-06, | |
| "loss": 0.017, | |
| "num_tokens": 53922953.0, | |
| "reward": 22.345840454101562, | |
| "reward_std": 3.4734017848968506, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.345840454101562, | |
| "rewards/skywork_reward/std": 7.882354736328125, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1463.0, | |
| "completions/mean_length": 556.958984375, | |
| "completions/mean_terminated_length": 553.11962890625, | |
| "completions/min_length": 141.0, | |
| "completions/min_terminated_length": 141.0, | |
| "epoch": 0.12274096385542169, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33252498507499695, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0592, | |
| "num_tokens": 54249444.0, | |
| "reward": 20.580535888671875, | |
| "reward_std": 3.609018087387085, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.580535888671875, | |
| "rewards/skywork_reward/std": 7.147970199584961, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 598.171875, | |
| "completions/mean_terminated_length": 596.3366088867188, | |
| "completions/min_length": 189.0, | |
| "completions/min_terminated_length": 189.0, | |
| "epoch": 0.12349397590361445, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3217158913612366, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0387, | |
| "num_tokens": 54602780.0, | |
| "reward": 20.154247283935547, | |
| "reward_std": 3.670168399810791, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.154247283935547, | |
| "rewards/skywork_reward/std": 6.9621381759643555, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1384.0, | |
| "completions/mean_length": 615.55078125, | |
| "completions/mean_terminated_length": 613.74951171875, | |
| "completions/min_length": 156.0, | |
| "completions/min_terminated_length": 156.0, | |
| "epoch": 0.12424698795180723, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.324698805809021, | |
| "learning_rate": 1e-06, | |
| "loss": 0.062, | |
| "num_tokens": 54967254.0, | |
| "reward": 22.460899353027344, | |
| "reward_std": 4.602289199829102, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.460899353027344, | |
| "rewards/skywork_reward/std": 7.923734664916992, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1451.0, | |
| "completions/mean_length": 608.3203125, | |
| "completions/mean_terminated_length": 606.5048828125, | |
| "completions/min_length": 256.0, | |
| "completions/min_terminated_length": 256.0, | |
| "epoch": 0.125, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3245730400085449, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0314, | |
| "num_tokens": 55327114.0, | |
| "reward": 20.21398162841797, | |
| "reward_std": 3.487682342529297, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.21398162841797, | |
| "rewards/skywork_reward/std": 7.384416103363037, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1434.0, | |
| "completions/mean_length": 548.36328125, | |
| "completions/mean_terminated_length": 546.4305419921875, | |
| "completions/min_length": 76.0, | |
| "completions/min_terminated_length": 76.0, | |
| "epoch": 0.12575301204819278, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.35269981622695923, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0122, | |
| "num_tokens": 55651652.0, | |
| "reward": 22.794578552246094, | |
| "reward_std": 3.213481903076172, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.794578552246094, | |
| "rewards/skywork_reward/std": 7.596462726593018, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1524.0, | |
| "completions/mean_length": 584.244140625, | |
| "completions/mean_terminated_length": 576.75, | |
| "completions/min_length": 207.0, | |
| "completions/min_terminated_length": 207.0, | |
| "epoch": 0.12650602409638553, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33272233605384827, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0165, | |
| "num_tokens": 55995473.0, | |
| "reward": 21.558271408081055, | |
| "reward_std": 3.5698699951171875, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.558271408081055, | |
| "rewards/skywork_reward/std": 7.89260721206665, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1353.0, | |
| "completions/max_terminated_length": 1353.0, | |
| "completions/mean_length": 607.416015625, | |
| "completions/mean_terminated_length": 607.416015625, | |
| "completions/min_length": 263.0, | |
| "completions/min_terminated_length": 263.0, | |
| "epoch": 0.1272590361445783, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.35381069779396057, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0291, | |
| "num_tokens": 56355750.0, | |
| "reward": 21.860408782958984, | |
| "reward_std": 4.002436637878418, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.860408782958984, | |
| "rewards/skywork_reward/std": 6.837855339050293, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1370.0, | |
| "completions/mean_length": 570.771484375, | |
| "completions/mean_terminated_length": 568.882568359375, | |
| "completions/min_length": 168.0, | |
| "completions/min_terminated_length": 168.0, | |
| "epoch": 0.1280120481927711, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.34828981757164, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0188, | |
| "num_tokens": 56692929.0, | |
| "reward": 20.509864807128906, | |
| "reward_std": 3.7775073051452637, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.509864807128906, | |
| "rewards/skywork_reward/std": 7.960306167602539, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1454.0, | |
| "completions/mean_length": 624.5546875, | |
| "completions/mean_terminated_length": 622.7710571289062, | |
| "completions/min_length": 182.0, | |
| "completions/min_terminated_length": 182.0, | |
| "epoch": 0.12876506024096385, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3327150046825409, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0299, | |
| "num_tokens": 57063869.0, | |
| "reward": 22.369293212890625, | |
| "reward_std": 3.6728134155273438, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.369293212890625, | |
| "rewards/skywork_reward/std": 5.960691928863525, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1187.0, | |
| "completions/max_terminated_length": 1187.0, | |
| "completions/mean_length": 547.8203125, | |
| "completions/mean_terminated_length": 547.8203125, | |
| "completions/min_length": 170.0, | |
| "completions/min_terminated_length": 170.0, | |
| "epoch": 0.12951807228915663, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.38631579279899597, | |
| "learning_rate": 1e-06, | |
| "loss": 0.013, | |
| "num_tokens": 57390065.0, | |
| "reward": 21.2592716217041, | |
| "reward_std": 3.4279208183288574, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.2592716217041, | |
| "rewards/skywork_reward/std": 7.6550774574279785, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1250.0, | |
| "completions/max_terminated_length": 1250.0, | |
| "completions/mean_length": 591.0859375, | |
| "completions/mean_terminated_length": 591.0859375, | |
| "completions/min_length": 219.0, | |
| "completions/min_terminated_length": 219.0, | |
| "epoch": 0.1302710843373494, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3411237299442291, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0128, | |
| "num_tokens": 57743421.0, | |
| "reward": 21.829246520996094, | |
| "reward_std": 3.742777109146118, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.829246520996094, | |
| "rewards/skywork_reward/std": 7.164978981018066, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1455.0, | |
| "completions/mean_length": 577.67578125, | |
| "completions/mean_terminated_length": 575.8004150390625, | |
| "completions/min_length": 207.0, | |
| "completions/min_terminated_length": 207.0, | |
| "epoch": 0.13102409638554216, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.34287434816360474, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0272, | |
| "num_tokens": 58083831.0, | |
| "reward": 22.2662353515625, | |
| "reward_std": 3.2713894844055176, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.2662353515625, | |
| "rewards/skywork_reward/std": 7.178630352020264, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1258.0, | |
| "completions/mean_length": 565.248046875, | |
| "completions/mean_terminated_length": 561.4412231445312, | |
| "completions/min_length": 202.0, | |
| "completions/min_terminated_length": 202.0, | |
| "epoch": 0.13177710843373494, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.35165417194366455, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0374, | |
| "num_tokens": 58418918.0, | |
| "reward": 22.354202270507812, | |
| "reward_std": 4.054803848266602, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.354202270507812, | |
| "rewards/skywork_reward/std": 7.76880407333374, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1072.0, | |
| "completions/max_terminated_length": 1072.0, | |
| "completions/mean_length": 513.833984375, | |
| "completions/mean_terminated_length": 513.833984375, | |
| "completions/min_length": 147.0, | |
| "completions/min_terminated_length": 147.0, | |
| "epoch": 0.13253012048192772, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3769795298576355, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0094, | |
| "num_tokens": 58727089.0, | |
| "reward": 24.008331298828125, | |
| "reward_std": 3.7061939239501953, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 24.008331298828125, | |
| "rewards/skywork_reward/std": 7.291410446166992, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1463.0, | |
| "completions/mean_length": 594.93359375, | |
| "completions/mean_terminated_length": 593.0919799804688, | |
| "completions/min_length": 238.0, | |
| "completions/min_terminated_length": 238.0, | |
| "epoch": 0.13328313253012047, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3345366418361664, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0389, | |
| "num_tokens": 59079759.0, | |
| "reward": 22.873056411743164, | |
| "reward_std": 3.8607959747314453, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.873056411743164, | |
| "rewards/skywork_reward/std": 6.990558624267578, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1447.0, | |
| "completions/max_terminated_length": 1447.0, | |
| "completions/mean_length": 588.767578125, | |
| "completions/mean_terminated_length": 588.767578125, | |
| "completions/min_length": 208.0, | |
| "completions/min_terminated_length": 208.0, | |
| "epoch": 0.13403614457831325, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.34772616624832153, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0202, | |
| "num_tokens": 59424152.0, | |
| "reward": 20.871658325195312, | |
| "reward_std": 3.6873836517333984, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.871658325195312, | |
| "rewards/skywork_reward/std": 7.474151611328125, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1480.0, | |
| "completions/mean_length": 597.91015625, | |
| "completions/mean_terminated_length": 592.3811645507812, | |
| "completions/min_length": 244.0, | |
| "completions/min_terminated_length": 244.0, | |
| "epoch": 0.13478915662650603, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3342570662498474, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0287, | |
| "num_tokens": 59775834.0, | |
| "reward": 22.359867095947266, | |
| "reward_std": 3.218701124191284, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.359867095947266, | |
| "rewards/skywork_reward/std": 8.73936939239502, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1486.0, | |
| "completions/max_terminated_length": 1486.0, | |
| "completions/mean_length": 583.130859375, | |
| "completions/mean_terminated_length": 583.130859375, | |
| "completions/min_length": 219.0, | |
| "completions/min_terminated_length": 219.0, | |
| "epoch": 0.1355421686746988, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.32025235891342163, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0153, | |
| "num_tokens": 60119293.0, | |
| "reward": 21.634063720703125, | |
| "reward_std": 3.2388057708740234, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.634063720703125, | |
| "rewards/skywork_reward/std": 6.3536248207092285, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1263.0, | |
| "completions/mean_length": 599.6484375, | |
| "completions/mean_terminated_length": 597.8160400390625, | |
| "completions/min_length": 205.0, | |
| "completions/min_terminated_length": 205.0, | |
| "epoch": 0.13629518072289157, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3474103808403015, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0315, | |
| "num_tokens": 60467305.0, | |
| "reward": 22.63400650024414, | |
| "reward_std": 3.6428089141845703, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.63400650024414, | |
| "rewards/skywork_reward/std": 8.048898696899414, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1180.0, | |
| "completions/max_terminated_length": 1180.0, | |
| "completions/mean_length": 602.431640625, | |
| "completions/mean_terminated_length": 602.431640625, | |
| "completions/min_length": 287.0, | |
| "completions/min_terminated_length": 287.0, | |
| "epoch": 0.13704819277108435, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3456987142562866, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0283, | |
| "num_tokens": 60822518.0, | |
| "reward": 22.980743408203125, | |
| "reward_std": 3.3244166374206543, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.980743408203125, | |
| "rewards/skywork_reward/std": 7.1537909507751465, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1241.0, | |
| "completions/max_terminated_length": 1241.0, | |
| "completions/mean_length": 617.828125, | |
| "completions/mean_terminated_length": 617.828125, | |
| "completions/min_length": 253.0, | |
| "completions/min_terminated_length": 253.0, | |
| "epoch": 0.1378012048192771, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.32726573944091797, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0093, | |
| "num_tokens": 61183902.0, | |
| "reward": 23.039154052734375, | |
| "reward_std": 2.817411422729492, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.039154052734375, | |
| "rewards/skywork_reward/std": 6.791592597961426, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1528.0, | |
| "completions/mean_length": 601.18359375, | |
| "completions/mean_terminated_length": 599.3541870117188, | |
| "completions/min_length": 246.0, | |
| "completions/min_terminated_length": 246.0, | |
| "epoch": 0.13855421686746988, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3330095112323761, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0182, | |
| "num_tokens": 61539884.0, | |
| "reward": 23.63543701171875, | |
| "reward_std": 3.1759846210479736, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.63543701171875, | |
| "rewards/skywork_reward/std": 7.07273006439209, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1424.0, | |
| "completions/max_terminated_length": 1424.0, | |
| "completions/mean_length": 651.955078125, | |
| "completions/mean_terminated_length": 651.955078125, | |
| "completions/min_length": 265.0, | |
| "completions/min_terminated_length": 265.0, | |
| "epoch": 0.13930722891566266, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.39070987701416016, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0259, | |
| "num_tokens": 61919077.0, | |
| "reward": 21.221710205078125, | |
| "reward_std": 3.5847105979919434, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.221710205078125, | |
| "rewards/skywork_reward/std": 7.764612197875977, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1329.0, | |
| "completions/max_terminated_length": 1329.0, | |
| "completions/mean_length": 621.73828125, | |
| "completions/mean_terminated_length": 621.73828125, | |
| "completions/min_length": 227.0, | |
| "completions/min_terminated_length": 227.0, | |
| "epoch": 0.14006024096385541, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.36137887835502625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0158, | |
| "num_tokens": 62282095.0, | |
| "reward": 22.08428955078125, | |
| "reward_std": 3.342141628265381, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.08428955078125, | |
| "rewards/skywork_reward/std": 6.694947719573975, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1294.0, | |
| "completions/max_terminated_length": 1294.0, | |
| "completions/mean_length": 649.474609375, | |
| "completions/mean_terminated_length": 649.474609375, | |
| "completions/min_length": 264.0, | |
| "completions/min_terminated_length": 264.0, | |
| "epoch": 0.1408132530120482, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3418244421482086, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0059, | |
| "num_tokens": 62658690.0, | |
| "reward": 23.334083557128906, | |
| "reward_std": 3.1341238021850586, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.334083557128906, | |
| "rewards/skywork_reward/std": 6.964704513549805, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1408.0, | |
| "completions/max_terminated_length": 1408.0, | |
| "completions/mean_length": 609.8046875, | |
| "completions/mean_terminated_length": 609.8046875, | |
| "completions/min_length": 249.0, | |
| "completions/min_terminated_length": 249.0, | |
| "epoch": 0.14156626506024098, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3598421812057495, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0167, | |
| "num_tokens": 63015342.0, | |
| "reward": 23.847320556640625, | |
| "reward_std": 3.479365825653076, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.847320556640625, | |
| "rewards/skywork_reward/std": 7.224637508392334, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1336.0, | |
| "completions/mean_length": 631.234375, | |
| "completions/mean_terminated_length": 625.9017944335938, | |
| "completions/min_length": 203.0, | |
| "completions/min_terminated_length": 203.0, | |
| "epoch": 0.14231927710843373, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3684713542461395, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0391, | |
| "num_tokens": 63385654.0, | |
| "reward": 20.486244201660156, | |
| "reward_std": 3.928035020828247, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 20.486244201660156, | |
| "rewards/skywork_reward/std": 7.389331817626953, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1231.0, | |
| "completions/max_terminated_length": 1231.0, | |
| "completions/mean_length": 559.046875, | |
| "completions/mean_terminated_length": 559.046875, | |
| "completions/min_length": 177.0, | |
| "completions/min_terminated_length": 177.0, | |
| "epoch": 0.1430722891566265, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43111714720726013, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0227, | |
| "num_tokens": 63715182.0, | |
| "reward": 24.990936279296875, | |
| "reward_std": 3.196828842163086, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 24.990936279296875, | |
| "rewards/skywork_reward/std": 6.8701629638671875, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1423.0, | |
| "completions/max_terminated_length": 1423.0, | |
| "completions/mean_length": 673.578125, | |
| "completions/mean_terminated_length": 673.578125, | |
| "completions/min_length": 270.0, | |
| "completions/min_terminated_length": 270.0, | |
| "epoch": 0.1438253012048193, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33778226375579834, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0162, | |
| "num_tokens": 64108118.0, | |
| "reward": 22.113998413085938, | |
| "reward_std": 3.290834903717041, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.113998413085938, | |
| "rewards/skywork_reward/std": 7.376873970031738, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1530.0, | |
| "completions/mean_length": 710.322265625, | |
| "completions/mean_terminated_length": 698.8772583007812, | |
| "completions/min_length": 360.0, | |
| "completions/min_terminated_length": 360.0, | |
| "epoch": 0.14457831325301204, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.33378493785858154, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0461, | |
| "num_tokens": 64517355.0, | |
| "reward": 22.64098358154297, | |
| "reward_std": 3.9250552654266357, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.64098358154297, | |
| "rewards/skywork_reward/std": 7.483473777770996, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1432.0, | |
| "completions/max_terminated_length": 1432.0, | |
| "completions/mean_length": 609.9453125, | |
| "completions/mean_terminated_length": 609.9453125, | |
| "completions/min_length": 253.0, | |
| "completions/min_terminated_length": 253.0, | |
| "epoch": 0.14533132530120482, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3949362337589264, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0283, | |
| "num_tokens": 64875503.0, | |
| "reward": 22.26373291015625, | |
| "reward_std": 3.6811184883117676, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.26373291015625, | |
| "rewards/skywork_reward/std": 7.222413539886475, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1306.0, | |
| "completions/max_terminated_length": 1306.0, | |
| "completions/mean_length": 611.69140625, | |
| "completions/mean_terminated_length": 611.69140625, | |
| "completions/min_length": 194.0, | |
| "completions/min_terminated_length": 194.0, | |
| "epoch": 0.1460843373493976, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.37980496883392334, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0178, | |
| "num_tokens": 65240289.0, | |
| "reward": 23.238971710205078, | |
| "reward_std": 3.542300224304199, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.238971710205078, | |
| "rewards/skywork_reward/std": 7.373708724975586, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1278.0, | |
| "completions/mean_length": 675.423828125, | |
| "completions/mean_terminated_length": 672.049072265625, | |
| "completions/min_length": 333.0, | |
| "completions/min_terminated_length": 333.0, | |
| "epoch": 0.14683734939759036, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 10.594128608703613, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0214, | |
| "num_tokens": 65631722.0, | |
| "reward": 22.663818359375, | |
| "reward_std": 3.3215575218200684, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 22.663818359375, | |
| "rewards/skywork_reward/std": 7.0012664794921875, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1510.0, | |
| "completions/mean_length": 643.921875, | |
| "completions/mean_terminated_length": 636.8976440429688, | |
| "completions/min_length": 270.0, | |
| "completions/min_terminated_length": 270.0, | |
| "epoch": 0.14759036144578314, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.38080358505249023, | |
| "learning_rate": 1e-06, | |
| "loss": 0.025, | |
| "num_tokens": 66005026.0, | |
| "reward": 23.194847106933594, | |
| "reward_std": 3.9582090377807617, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.194847106933594, | |
| "rewards/skywork_reward/std": 6.189321041107178, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1524.0, | |
| "completions/max_terminated_length": 1524.0, | |
| "completions/mean_length": 705.38671875, | |
| "completions/mean_terminated_length": 705.38671875, | |
| "completions/min_length": 257.0, | |
| "completions/min_terminated_length": 257.0, | |
| "epoch": 0.14834337349397592, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3392493724822998, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0361, | |
| "num_tokens": 66416936.0, | |
| "reward": 23.629547119140625, | |
| "reward_std": 3.766573905944824, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.629547119140625, | |
| "rewards/skywork_reward/std": 6.338648796081543, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1299.0, | |
| "completions/max_terminated_length": 1299.0, | |
| "completions/mean_length": 635.73828125, | |
| "completions/mean_terminated_length": 635.73828125, | |
| "completions/min_length": 269.0, | |
| "completions/min_terminated_length": 269.0, | |
| "epoch": 0.14909638554216867, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.37394610047340393, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0242, | |
| "num_tokens": 66789730.0, | |
| "reward": 23.806228637695312, | |
| "reward_std": 3.5995078086853027, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.806228637695312, | |
| "rewards/skywork_reward/std": 7.185088157653809, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1284.0, | |
| "completions/max_terminated_length": 1284.0, | |
| "completions/mean_length": 688.634765625, | |
| "completions/mean_terminated_length": 688.634765625, | |
| "completions/min_length": 312.0, | |
| "completions/min_terminated_length": 312.0, | |
| "epoch": 0.14984939759036145, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.31833726167678833, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0197, | |
| "num_tokens": 67186647.0, | |
| "reward": 23.605316162109375, | |
| "reward_std": 3.3630738258361816, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 23.605316162109375, | |
| "rewards/skywork_reward/std": 8.333732604980469, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1263.0, | |
| "completions/max_terminated_length": 1263.0, | |
| "completions/mean_length": 617.896484375, | |
| "completions/mean_terminated_length": 617.896484375, | |
| "completions/min_length": 323.0, | |
| "completions/min_terminated_length": 323.0, | |
| "epoch": 0.15060240963855423, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3671130836009979, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0301, | |
| "num_tokens": 67548866.0, | |
| "reward": 21.822662353515625, | |
| "reward_std": 3.521603584289551, | |
| "rewards/accuracy_reward/mean": 0.0, | |
| "rewards/accuracy_reward/std": 0.0, | |
| "rewards/skywork_reward/mean": 21.822662353515625, | |
| "rewards/skywork_reward/std": 6.942006587982178, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1328, | |
| "num_input_tokens_seen": 67548866, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |