Instructions to use IgnoraZ/llama3_synthquestions_dpo_100k with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use IgnoraZ/llama3_synthquestions_dpo_100k with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="IgnoraZ/llama3_synthquestions_dpo_100k") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("IgnoraZ/llama3_synthquestions_dpo_100k") model = AutoModelForCausalLM.from_pretrained("IgnoraZ/llama3_synthquestions_dpo_100k") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use IgnoraZ/llama3_synthquestions_dpo_100k with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "IgnoraZ/llama3_synthquestions_dpo_100k" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "IgnoraZ/llama3_synthquestions_dpo_100k", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/IgnoraZ/llama3_synthquestions_dpo_100k
- SGLang
How to use IgnoraZ/llama3_synthquestions_dpo_100k with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "IgnoraZ/llama3_synthquestions_dpo_100k" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "IgnoraZ/llama3_synthquestions_dpo_100k", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "IgnoraZ/llama3_synthquestions_dpo_100k" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "IgnoraZ/llama3_synthquestions_dpo_100k", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use IgnoraZ/llama3_synthquestions_dpo_100k with Docker Model Runner:
docker model run hf.co/IgnoraZ/llama3_synthquestions_dpo_100k
| { | |
| "best_metric": 0.4712187647819519, | |
| "best_model_checkpoint": "/mnt/yscfs/zhuchiwei/realquestions/ckpt/250212_realquestions_dpo/checkpoint-700", | |
| "epoch": 0.99968, | |
| "eval_steps": 100, | |
| "global_step": 781, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00128, | |
| "grad_norm": 7.436504551716031, | |
| "learning_rate": 8.860759493670886e-09, | |
| "logits/chosen": -1.0859375, | |
| "logits/rejected": -1.10498046875, | |
| "logps/chosen": -336.5, | |
| "logps/rejected": -339.5, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00256, | |
| "grad_norm": 7.250237903929739, | |
| "learning_rate": 1.772151898734177e-08, | |
| "logits/chosen": -1.134765625, | |
| "logits/rejected": -1.11767578125, | |
| "logps/chosen": -329.75, | |
| "logps/rejected": -317.75, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00384, | |
| "grad_norm": 7.662424410669392, | |
| "learning_rate": 2.658227848101266e-08, | |
| "logits/chosen": -1.1162109375, | |
| "logits/rejected": -1.1435546875, | |
| "logps/chosen": -334.5, | |
| "logps/rejected": -305.75, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.0010561943054199219, | |
| "rewards/margins": 0.0004811286926269531, | |
| "rewards/rejected": -0.0015385150909423828, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00512, | |
| "grad_norm": 7.278826365213056, | |
| "learning_rate": 3.544303797468354e-08, | |
| "logits/chosen": -1.154296875, | |
| "logits/rejected": -1.173828125, | |
| "logps/chosen": -312.5, | |
| "logps/rejected": -320.625, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": 0.00021409988403320312, | |
| "rewards/margins": 5.698204040527344e-05, | |
| "rewards/rejected": 0.000156402587890625, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 7.667975226995807, | |
| "learning_rate": 4.430379746835443e-08, | |
| "logits/chosen": -1.1318359375, | |
| "logits/rejected": -1.1826171875, | |
| "logps/chosen": -335.0, | |
| "logps/rejected": -351.875, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -3.075599670410156e-05, | |
| "rewards/margins": 0.0016710758209228516, | |
| "rewards/rejected": -0.0016994476318359375, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00768, | |
| "grad_norm": 7.34407022526262, | |
| "learning_rate": 5.316455696202532e-08, | |
| "logits/chosen": -1.0146484375, | |
| "logits/rejected": -1.1123046875, | |
| "logps/chosen": -330.75, | |
| "logps/rejected": -351.5, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.3046875, | |
| "rewards/chosen": 0.0008311271667480469, | |
| "rewards/margins": -0.0005159378051757812, | |
| "rewards/rejected": 0.0013489723205566406, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.00896, | |
| "grad_norm": 6.79379213946981, | |
| "learning_rate": 6.20253164556962e-08, | |
| "logits/chosen": -1.044921875, | |
| "logits/rejected": -1.10986328125, | |
| "logps/chosen": -285.875, | |
| "logps/rejected": -287.0, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.3046875, | |
| "rewards/chosen": -0.00012946128845214844, | |
| "rewards/margins": -0.00041294097900390625, | |
| "rewards/rejected": 0.0002841949462890625, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01024, | |
| "grad_norm": 7.527185660748829, | |
| "learning_rate": 7.088607594936708e-08, | |
| "logits/chosen": -1.12548828125, | |
| "logits/rejected": -1.17578125, | |
| "logps/chosen": -337.25, | |
| "logps/rejected": -321.0, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.296875, | |
| "rewards/chosen": 0.0013508796691894531, | |
| "rewards/margins": 0.0003237724304199219, | |
| "rewards/rejected": 0.001026153564453125, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01152, | |
| "grad_norm": 7.076196519468638, | |
| "learning_rate": 7.974683544303797e-08, | |
| "logits/chosen": -1.1572265625, | |
| "logits/rejected": -1.1826171875, | |
| "logps/chosen": -297.0, | |
| "logps/rejected": -312.125, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.234375, | |
| "rewards/chosen": 0.0007529258728027344, | |
| "rewards/margins": -0.0009369850158691406, | |
| "rewards/rejected": 0.0016903877258300781, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 7.318991452232657, | |
| "learning_rate": 8.860759493670886e-08, | |
| "logits/chosen": -1.10400390625, | |
| "logits/rejected": -1.1416015625, | |
| "logps/chosen": -318.75, | |
| "logps/rejected": -304.125, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": 0.0015916824340820312, | |
| "rewards/margins": 0.0015668869018554688, | |
| "rewards/rejected": 2.9325485229492188e-05, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01408, | |
| "grad_norm": 7.525848702124104, | |
| "learning_rate": 9.746835443037974e-08, | |
| "logits/chosen": -1.1044921875, | |
| "logits/rejected": -1.09130859375, | |
| "logps/chosen": -341.25, | |
| "logps/rejected": -323.5, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.2578125, | |
| "rewards/chosen": -0.0008592605590820312, | |
| "rewards/margins": -0.0011625289916992188, | |
| "rewards/rejected": 0.00030422210693359375, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01536, | |
| "grad_norm": 7.187235726257762, | |
| "learning_rate": 1.0632911392405063e-07, | |
| "logits/chosen": -1.09619140625, | |
| "logits/rejected": -1.15283203125, | |
| "logps/chosen": -327.25, | |
| "logps/rejected": -326.25, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.0012707710266113281, | |
| "rewards/margins": -0.00025081634521484375, | |
| "rewards/rejected": -0.001018524169921875, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01664, | |
| "grad_norm": 7.376759243506061, | |
| "learning_rate": 1.151898734177215e-07, | |
| "logits/chosen": -1.130859375, | |
| "logits/rejected": -1.1220703125, | |
| "logps/chosen": -317.75, | |
| "logps/rejected": -320.0, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": 5.7220458984375e-05, | |
| "rewards/margins": 0.0014476776123046875, | |
| "rewards/rejected": -0.0013861656188964844, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01792, | |
| "grad_norm": 7.321841205322695, | |
| "learning_rate": 1.240506329113924e-07, | |
| "logits/chosen": -1.11669921875, | |
| "logits/rejected": -1.146484375, | |
| "logps/chosen": -298.5, | |
| "logps/rejected": -291.875, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": 0.0023250579833984375, | |
| "rewards/margins": 0.00077056884765625, | |
| "rewards/rejected": 0.0015516281127929688, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 7.513669747884291, | |
| "learning_rate": 1.329113924050633e-07, | |
| "logits/chosen": -1.02978515625, | |
| "logits/rejected": -1.06396484375, | |
| "logps/chosen": -348.5, | |
| "logps/rejected": -348.0, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.2890625, | |
| "rewards/chosen": 0.00205230712890625, | |
| "rewards/margins": 0.001857757568359375, | |
| "rewards/rejected": 0.00019502639770507812, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02048, | |
| "grad_norm": 7.56834980768175, | |
| "learning_rate": 1.4177215189873417e-07, | |
| "logits/chosen": -1.10009765625, | |
| "logits/rejected": -1.123046875, | |
| "logps/chosen": -331.125, | |
| "logps/rejected": -330.25, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": 0.0020537376403808594, | |
| "rewards/margins": 0.0016803741455078125, | |
| "rewards/rejected": 0.0003743171691894531, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02176, | |
| "grad_norm": 7.361758673679262, | |
| "learning_rate": 1.5063291139240505e-07, | |
| "logits/chosen": -1.201171875, | |
| "logits/rejected": -1.1767578125, | |
| "logps/chosen": -337.375, | |
| "logps/rejected": -321.5, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": -0.001008749008178711, | |
| "rewards/margins": -0.001155853271484375, | |
| "rewards/rejected": 0.00014638900756835938, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02304, | |
| "grad_norm": 7.632190356013242, | |
| "learning_rate": 1.5949367088607593e-07, | |
| "logits/chosen": -1.1279296875, | |
| "logits/rejected": -1.18115234375, | |
| "logps/chosen": -324.25, | |
| "logps/rejected": -323.75, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": 0.002650022506713867, | |
| "rewards/margins": 0.0017561912536621094, | |
| "rewards/rejected": 0.0008993148803710938, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02432, | |
| "grad_norm": 7.567509047244517, | |
| "learning_rate": 1.6835443037974684e-07, | |
| "logits/chosen": -1.2041015625, | |
| "logits/rejected": -1.126953125, | |
| "logps/chosen": -334.125, | |
| "logps/rejected": -280.0, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": 0.0007448196411132812, | |
| "rewards/margins": 0.00107574462890625, | |
| "rewards/rejected": -0.00033283233642578125, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 7.324051502003322, | |
| "learning_rate": 1.7721518987341772e-07, | |
| "logits/chosen": -1.09814453125, | |
| "logits/rejected": -1.10302734375, | |
| "logps/chosen": -304.5, | |
| "logps/rejected": -293.875, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.234375, | |
| "rewards/chosen": -0.0005500316619873047, | |
| "rewards/margins": -0.0008497238159179688, | |
| "rewards/rejected": 0.00030422210693359375, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02688, | |
| "grad_norm": 7.1047505475300525, | |
| "learning_rate": 1.8607594936708857e-07, | |
| "logits/chosen": -1.06591796875, | |
| "logits/rejected": -1.103515625, | |
| "logps/chosen": -343.75, | |
| "logps/rejected": -317.625, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": 0.0008325576782226562, | |
| "rewards/margins": -5.53131103515625e-05, | |
| "rewards/rejected": 0.0008883476257324219, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.02816, | |
| "grad_norm": 6.910170627811144, | |
| "learning_rate": 1.9493670886075948e-07, | |
| "logits/chosen": -1.08251953125, | |
| "logits/rejected": -1.1064453125, | |
| "logps/chosen": -296.5, | |
| "logps/rejected": -283.375, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": 0.0032749176025390625, | |
| "rewards/margins": 0.003100872039794922, | |
| "rewards/rejected": 0.00017690658569335938, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02944, | |
| "grad_norm": 7.159911065415018, | |
| "learning_rate": 2.0379746835443036e-07, | |
| "logits/chosen": -1.1181640625, | |
| "logits/rejected": -1.17578125, | |
| "logps/chosen": -322.5, | |
| "logps/rejected": -323.5, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": 0.0003407001495361328, | |
| "rewards/margins": 0.0001850128173828125, | |
| "rewards/rejected": 0.0001583099365234375, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.03072, | |
| "grad_norm": 7.159118745120893, | |
| "learning_rate": 2.1265822784810127e-07, | |
| "logits/chosen": -1.1064453125, | |
| "logits/rejected": -1.12890625, | |
| "logps/chosen": -317.375, | |
| "logps/rejected": -315.75, | |
| "loss": 0.6919, | |
| "rewards/accuracies": 0.3203125, | |
| "rewards/chosen": 0.0022192001342773438, | |
| "rewards/margins": 0.0004982948303222656, | |
| "rewards/rejected": 0.0017242431640625, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 7.381100080259422, | |
| "learning_rate": 2.2151898734177215e-07, | |
| "logits/chosen": -1.0849609375, | |
| "logits/rejected": -1.11376953125, | |
| "logps/chosen": -320.625, | |
| "logps/rejected": -313.5, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.3515625, | |
| "rewards/chosen": 0.005059480667114258, | |
| "rewards/margins": -0.00042819976806640625, | |
| "rewards/rejected": 0.005497932434082031, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03328, | |
| "grad_norm": 7.218282400256934, | |
| "learning_rate": 2.30379746835443e-07, | |
| "logits/chosen": -1.166015625, | |
| "logits/rejected": -1.185546875, | |
| "logps/chosen": -326.875, | |
| "logps/rejected": -331.875, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": 0.004558563232421875, | |
| "rewards/margins": 0.0010230541229248047, | |
| "rewards/rejected": 0.0035305023193359375, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03456, | |
| "grad_norm": 7.239082966066987, | |
| "learning_rate": 2.392405063291139e-07, | |
| "logits/chosen": -1.1005859375, | |
| "logits/rejected": -1.15283203125, | |
| "logps/chosen": -342.75, | |
| "logps/rejected": -328.875, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": 0.004992961883544922, | |
| "rewards/margins": -2.574920654296875e-05, | |
| "rewards/rejected": 0.0050220489501953125, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03584, | |
| "grad_norm": 6.948014033726085, | |
| "learning_rate": 2.481012658227848e-07, | |
| "logits/chosen": -1.05908203125, | |
| "logits/rejected": -1.0693359375, | |
| "logps/chosen": -321.625, | |
| "logps/rejected": -285.125, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.296875, | |
| "rewards/chosen": 0.0019271373748779297, | |
| "rewards/margins": -0.002063751220703125, | |
| "rewards/rejected": 0.003989458084106445, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03712, | |
| "grad_norm": 6.869465257961058, | |
| "learning_rate": 2.5696202531645567e-07, | |
| "logits/chosen": -1.162109375, | |
| "logits/rejected": -1.15380859375, | |
| "logps/chosen": -307.5, | |
| "logps/rejected": -294.625, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": 0.00728607177734375, | |
| "rewards/margins": 5.91278076171875e-05, | |
| "rewards/rejected": 0.00720977783203125, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 7.337023896045187, | |
| "learning_rate": 2.658227848101266e-07, | |
| "logits/chosen": -1.017578125, | |
| "logits/rejected": -1.07275390625, | |
| "logps/chosen": -327.25, | |
| "logps/rejected": -343.0, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": 0.0086822509765625, | |
| "rewards/margins": 5.5789947509765625e-05, | |
| "rewards/rejected": 0.008625030517578125, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03968, | |
| "grad_norm": 7.347791617859354, | |
| "learning_rate": 2.7468354430379743e-07, | |
| "logits/chosen": -1.18359375, | |
| "logits/rejected": -1.2197265625, | |
| "logps/chosen": -311.5, | |
| "logps/rejected": -307.0, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": 0.01105499267578125, | |
| "rewards/margins": 0.0011830329895019531, | |
| "rewards/rejected": 0.009868621826171875, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.04096, | |
| "grad_norm": 7.498901433929662, | |
| "learning_rate": 2.8354430379746834e-07, | |
| "logits/chosen": -1.14111328125, | |
| "logits/rejected": -1.11865234375, | |
| "logps/chosen": -334.5, | |
| "logps/rejected": -310.75, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": 0.009601593017578125, | |
| "rewards/margins": 0.0008537769317626953, | |
| "rewards/rejected": 0.008741378784179688, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04224, | |
| "grad_norm": 7.169131545571104, | |
| "learning_rate": 2.9240506329113925e-07, | |
| "logits/chosen": -1.10888671875, | |
| "logits/rejected": -1.11962890625, | |
| "logps/chosen": -329.25, | |
| "logps/rejected": -319.375, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": 0.013051986694335938, | |
| "rewards/margins": 0.0033140182495117188, | |
| "rewards/rejected": 0.00975799560546875, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04352, | |
| "grad_norm": 7.268573668937982, | |
| "learning_rate": 3.012658227848101e-07, | |
| "logits/chosen": -1.12109375, | |
| "logits/rejected": -1.13037109375, | |
| "logps/chosen": -322.25, | |
| "logps/rejected": -304.0, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": 0.011199951171875, | |
| "rewards/margins": 0.0003910064697265625, | |
| "rewards/rejected": 0.0108184814453125, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 7.598910436962618, | |
| "learning_rate": 3.1012658227848096e-07, | |
| "logits/chosen": -1.06591796875, | |
| "logits/rejected": -1.1396484375, | |
| "logps/chosen": -311.375, | |
| "logps/rejected": -331.5, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": 0.01214599609375, | |
| "rewards/margins": -0.0003960132598876953, | |
| "rewards/rejected": 0.012542724609375, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04608, | |
| "grad_norm": 7.108926141259724, | |
| "learning_rate": 3.1898734177215186e-07, | |
| "logits/chosen": -1.10205078125, | |
| "logits/rejected": -1.1240234375, | |
| "logps/chosen": -324.25, | |
| "logps/rejected": -304.125, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.3671875, | |
| "rewards/chosen": 0.0137176513671875, | |
| "rewards/margins": 0.001232147216796875, | |
| "rewards/rejected": 0.012485504150390625, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04736, | |
| "grad_norm": 7.267204751248262, | |
| "learning_rate": 3.2784810126582277e-07, | |
| "logits/chosen": -1.04638671875, | |
| "logits/rejected": -1.07470703125, | |
| "logps/chosen": -304.0, | |
| "logps/rejected": -331.25, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": 0.013946533203125, | |
| "rewards/margins": 0.00335693359375, | |
| "rewards/rejected": 0.010589599609375, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04864, | |
| "grad_norm": 7.22576003364743, | |
| "learning_rate": 3.367088607594937e-07, | |
| "logits/chosen": -1.1435546875, | |
| "logits/rejected": -1.1552734375, | |
| "logps/chosen": -338.0, | |
| "logps/rejected": -320.125, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": 0.0144500732421875, | |
| "rewards/margins": 0.000946044921875, | |
| "rewards/rejected": 0.01351165771484375, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04992, | |
| "grad_norm": 6.91155869405154, | |
| "learning_rate": 3.4556962025316453e-07, | |
| "logits/chosen": -1.02978515625, | |
| "logits/rejected": -1.1005859375, | |
| "logps/chosen": -301.125, | |
| "logps/rejected": -309.0, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": 0.013141632080078125, | |
| "rewards/margins": -0.00186920166015625, | |
| "rewards/rejected": 0.0149993896484375, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 7.5651212389735845, | |
| "learning_rate": 3.5443037974683544e-07, | |
| "logits/chosen": -1.11279296875, | |
| "logits/rejected": -1.18359375, | |
| "logps/chosen": -301.375, | |
| "logps/rejected": -328.0, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.359375, | |
| "rewards/chosen": 0.013530731201171875, | |
| "rewards/margins": -0.0004057884216308594, | |
| "rewards/rejected": 0.0139312744140625, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05248, | |
| "grad_norm": 7.055346161775772, | |
| "learning_rate": 3.632911392405063e-07, | |
| "logits/chosen": -1.1689453125, | |
| "logits/rejected": -1.19482421875, | |
| "logps/chosen": -325.75, | |
| "logps/rejected": -307.5, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": 0.017852783203125, | |
| "rewards/margins": 0.002711772918701172, | |
| "rewards/rejected": 0.01515960693359375, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.05376, | |
| "grad_norm": 7.034415420766986, | |
| "learning_rate": 3.7215189873417715e-07, | |
| "logits/chosen": -1.14501953125, | |
| "logits/rejected": -1.1533203125, | |
| "logps/chosen": -342.0, | |
| "logps/rejected": -316.25, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": 0.019439697265625, | |
| "rewards/margins": 0.0026378631591796875, | |
| "rewards/rejected": 0.01682281494140625, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05504, | |
| "grad_norm": 7.974604803897025, | |
| "learning_rate": 3.810126582278481e-07, | |
| "logits/chosen": -1.1513671875, | |
| "logits/rejected": -1.1669921875, | |
| "logps/chosen": -364.25, | |
| "logps/rejected": -370.125, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": 0.01983642578125, | |
| "rewards/margins": 0.0025014877319335938, | |
| "rewards/rejected": 0.01732635498046875, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.05632, | |
| "grad_norm": 7.177077737022184, | |
| "learning_rate": 3.8987341772151896e-07, | |
| "logits/chosen": -1.0830078125, | |
| "logits/rejected": -1.11474609375, | |
| "logps/chosen": -337.0, | |
| "logps/rejected": -321.375, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": 0.0222625732421875, | |
| "rewards/margins": 0.0036296844482421875, | |
| "rewards/rejected": 0.01862335205078125, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 7.092550989605114, | |
| "learning_rate": 3.9873417721518987e-07, | |
| "logits/chosen": -1.208984375, | |
| "logits/rejected": -1.1796875, | |
| "logps/chosen": -336.75, | |
| "logps/rejected": -338.5, | |
| "loss": 0.691, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": 0.0211334228515625, | |
| "rewards/margins": 0.0034885406494140625, | |
| "rewards/rejected": 0.01763916015625, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05888, | |
| "grad_norm": 7.466264419127936, | |
| "learning_rate": 4.075949367088607e-07, | |
| "logits/chosen": -1.0966796875, | |
| "logits/rejected": -1.126953125, | |
| "logps/chosen": -321.0, | |
| "logps/rejected": -334.75, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": 0.0274810791015625, | |
| "rewards/margins": 0.0086822509765625, | |
| "rewards/rejected": 0.0187835693359375, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06016, | |
| "grad_norm": 6.985552122485807, | |
| "learning_rate": 4.164556962025316e-07, | |
| "logits/chosen": -1.12109375, | |
| "logits/rejected": -1.1279296875, | |
| "logps/chosen": -303.0, | |
| "logps/rejected": -297.875, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": 0.0264129638671875, | |
| "rewards/margins": 0.003119945526123047, | |
| "rewards/rejected": 0.02330780029296875, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.06144, | |
| "grad_norm": 7.176358655196464, | |
| "learning_rate": 4.2531645569620254e-07, | |
| "logits/chosen": -1.1015625, | |
| "logits/rejected": -1.11083984375, | |
| "logps/chosen": -340.5, | |
| "logps/rejected": -318.75, | |
| "loss": 0.6901, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.028717041015625, | |
| "rewards/margins": 0.0058269500732421875, | |
| "rewards/rejected": 0.0229034423828125, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06272, | |
| "grad_norm": 7.100421828155237, | |
| "learning_rate": 4.341772151898734e-07, | |
| "logits/chosen": -1.2080078125, | |
| "logits/rejected": -1.2109375, | |
| "logps/chosen": -339.75, | |
| "logps/rejected": -332.25, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.3984375, | |
| "rewards/chosen": 0.0296630859375, | |
| "rewards/margins": 0.00295257568359375, | |
| "rewards/rejected": 0.0267333984375, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 7.057468496585091, | |
| "learning_rate": 4.430379746835443e-07, | |
| "logits/chosen": -1.115234375, | |
| "logits/rejected": -1.14599609375, | |
| "logps/chosen": -306.75, | |
| "logps/rejected": -277.0, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.5390625, | |
| "rewards/chosen": 0.03302001953125, | |
| "rewards/margins": 0.00989532470703125, | |
| "rewards/rejected": 0.02313232421875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06528, | |
| "grad_norm": 7.03326897520388, | |
| "learning_rate": 4.5189873417721515e-07, | |
| "logits/chosen": -1.1220703125, | |
| "logits/rejected": -1.1650390625, | |
| "logps/chosen": -292.5, | |
| "logps/rejected": -301.625, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": 0.0316925048828125, | |
| "rewards/margins": 0.0028066635131835938, | |
| "rewards/rejected": 0.0289306640625, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.06656, | |
| "grad_norm": 6.957761014625216, | |
| "learning_rate": 4.60759493670886e-07, | |
| "logits/chosen": -1.1015625, | |
| "logits/rejected": -1.1357421875, | |
| "logps/chosen": -309.75, | |
| "logps/rejected": -317.75, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.4296875, | |
| "rewards/chosen": 0.034149169921875, | |
| "rewards/margins": 0.004019737243652344, | |
| "rewards/rejected": 0.0301361083984375, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06784, | |
| "grad_norm": 6.866772048798592, | |
| "learning_rate": 4.6962025316455697e-07, | |
| "logits/chosen": -1.2236328125, | |
| "logits/rejected": -1.23828125, | |
| "logps/chosen": -313.0, | |
| "logps/rejected": -290.625, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.4609375, | |
| "rewards/chosen": 0.036773681640625, | |
| "rewards/margins": 0.0050182342529296875, | |
| "rewards/rejected": 0.0317535400390625, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.06912, | |
| "grad_norm": 34.49051291265523, | |
| "learning_rate": 4.784810126582278e-07, | |
| "logits/chosen": -1.1630859375, | |
| "logits/rejected": -1.1513671875, | |
| "logps/chosen": -297.375, | |
| "logps/rejected": -443.875, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.4765625, | |
| "rewards/chosen": 0.0362701416015625, | |
| "rewards/margins": 0.02279949188232422, | |
| "rewards/rejected": 0.013580322265625, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 7.450083821669119, | |
| "learning_rate": 4.873417721518987e-07, | |
| "logits/chosen": -1.126953125, | |
| "logits/rejected": -1.216796875, | |
| "logps/chosen": -303.125, | |
| "logps/rejected": -338.25, | |
| "loss": 0.6942, | |
| "rewards/accuracies": 0.3359375, | |
| "rewards/chosen": 0.03314208984375, | |
| "rewards/margins": -0.002353191375732422, | |
| "rewards/rejected": 0.0355072021484375, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.07168, | |
| "grad_norm": 7.328024111375669, | |
| "learning_rate": 4.962025316455696e-07, | |
| "logits/chosen": -1.1318359375, | |
| "logits/rejected": -1.12451171875, | |
| "logps/chosen": -327.75, | |
| "logps/rejected": -300.75, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 0.0408172607421875, | |
| "rewards/margins": 0.008729934692382812, | |
| "rewards/rejected": 0.0321197509765625, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07296, | |
| "grad_norm": 6.641731690681104, | |
| "learning_rate": 5.050632911392404e-07, | |
| "logits/chosen": -1.19482421875, | |
| "logits/rejected": -1.208984375, | |
| "logps/chosen": -274.0, | |
| "logps/rejected": -292.5, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0372772216796875, | |
| "rewards/margins": 0.0062408447265625, | |
| "rewards/rejected": 0.0310516357421875, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.07424, | |
| "grad_norm": 7.328502114098263, | |
| "learning_rate": 5.139240506329113e-07, | |
| "logits/chosen": -1.2109375, | |
| "logits/rejected": -1.220703125, | |
| "logps/chosen": -337.25, | |
| "logps/rejected": -324.25, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.4453125, | |
| "rewards/chosen": 0.0395965576171875, | |
| "rewards/margins": 0.008788108825683594, | |
| "rewards/rejected": 0.03082275390625, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07552, | |
| "grad_norm": 7.129804240589812, | |
| "learning_rate": 5.227848101265822e-07, | |
| "logits/chosen": -1.1884765625, | |
| "logits/rejected": -1.2099609375, | |
| "logps/chosen": -304.5, | |
| "logps/rejected": -306.75, | |
| "loss": 0.6885, | |
| "rewards/accuracies": 0.5078125, | |
| "rewards/chosen": 0.0443572998046875, | |
| "rewards/margins": 0.008890151977539062, | |
| "rewards/rejected": 0.035400390625, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 7.1937924938108875, | |
| "learning_rate": 5.316455696202532e-07, | |
| "logits/chosen": -1.07861328125, | |
| "logits/rejected": -1.03759765625, | |
| "logps/chosen": -322.25, | |
| "logps/rejected": -299.375, | |
| "loss": 0.6874, | |
| "rewards/accuracies": 0.5390625, | |
| "rewards/chosen": 0.046478271484375, | |
| "rewards/margins": 0.011920928955078125, | |
| "rewards/rejected": 0.0345611572265625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07808, | |
| "grad_norm": 7.129369707890816, | |
| "learning_rate": 5.405063291139241e-07, | |
| "logits/chosen": -1.171875, | |
| "logits/rejected": -1.15380859375, | |
| "logps/chosen": -317.25, | |
| "logps/rejected": -275.125, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 0.05120849609375, | |
| "rewards/margins": 0.01607513427734375, | |
| "rewards/rejected": 0.0351715087890625, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.07936, | |
| "grad_norm": 7.302643657053502, | |
| "learning_rate": 5.493670886075949e-07, | |
| "logits/chosen": -1.189453125, | |
| "logits/rejected": -1.2587890625, | |
| "logps/chosen": -330.75, | |
| "logps/rejected": -345.5, | |
| "loss": 0.691, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": 0.0424652099609375, | |
| "rewards/margins": 0.003490447998046875, | |
| "rewards/rejected": 0.03900146484375, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.08064, | |
| "grad_norm": 6.84968399915786, | |
| "learning_rate": 5.582278481012658e-07, | |
| "logits/chosen": -1.087890625, | |
| "logits/rejected": -1.119140625, | |
| "logps/chosen": -326.125, | |
| "logps/rejected": -308.625, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.4921875, | |
| "rewards/chosen": 0.0484619140625, | |
| "rewards/margins": 0.009546279907226562, | |
| "rewards/rejected": 0.038909912109375, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.08192, | |
| "grad_norm": 7.274670055824019, | |
| "learning_rate": 5.670886075949367e-07, | |
| "logits/chosen": -1.1708984375, | |
| "logits/rejected": -1.19140625, | |
| "logps/chosen": -324.25, | |
| "logps/rejected": -328.875, | |
| "loss": 0.6859, | |
| "rewards/accuracies": 0.5546875, | |
| "rewards/chosen": 0.054931640625, | |
| "rewards/margins": 0.013892173767089844, | |
| "rewards/rejected": 0.04095458984375, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 7.043191769175182, | |
| "learning_rate": 5.759493670886076e-07, | |
| "logits/chosen": -1.1728515625, | |
| "logits/rejected": -1.193359375, | |
| "logps/chosen": -321.75, | |
| "logps/rejected": -315.0, | |
| "loss": 0.6874, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.050048828125, | |
| "rewards/margins": 0.011362075805664062, | |
| "rewards/rejected": 0.0386810302734375, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.08448, | |
| "grad_norm": 7.1747985320738445, | |
| "learning_rate": 5.848101265822785e-07, | |
| "logits/chosen": -1.19921875, | |
| "logits/rejected": -1.224609375, | |
| "logps/chosen": -331.5, | |
| "logps/rejected": -333.5, | |
| "loss": 0.6893, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.05633544921875, | |
| "rewards/margins": 0.007636070251464844, | |
| "rewards/rejected": 0.04864501953125, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08576, | |
| "grad_norm": 6.706114883500065, | |
| "learning_rate": 5.936708860759493e-07, | |
| "logits/chosen": -1.1787109375, | |
| "logits/rejected": -1.17626953125, | |
| "logps/chosen": -327.0, | |
| "logps/rejected": -296.875, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.05401611328125, | |
| "rewards/margins": 0.016617774963378906, | |
| "rewards/rejected": 0.0373687744140625, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.08704, | |
| "grad_norm": 6.815414755393516, | |
| "learning_rate": 6.025316455696202e-07, | |
| "logits/chosen": -1.1162109375, | |
| "logits/rejected": -1.1396484375, | |
| "logps/chosen": -315.75, | |
| "logps/rejected": -303.5, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.5234375, | |
| "rewards/chosen": 0.0550537109375, | |
| "rewards/margins": 0.00931549072265625, | |
| "rewards/rejected": 0.0457763671875, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08832, | |
| "grad_norm": 7.17640719707697, | |
| "learning_rate": 6.113924050632911e-07, | |
| "logits/chosen": -1.169921875, | |
| "logits/rejected": -1.22314453125, | |
| "logps/chosen": -345.5, | |
| "logps/rejected": -337.75, | |
| "loss": 0.6857, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.05963134765625, | |
| "rewards/margins": 0.015005111694335938, | |
| "rewards/rejected": 0.0446624755859375, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 7.148882260506765, | |
| "learning_rate": 6.202531645569619e-07, | |
| "logits/chosen": -1.193359375, | |
| "logits/rejected": -1.171875, | |
| "logps/chosen": -324.25, | |
| "logps/rejected": -319.25, | |
| "loss": 0.6857, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": 0.058624267578125, | |
| "rewards/margins": 0.015472412109375, | |
| "rewards/rejected": 0.043121337890625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09088, | |
| "grad_norm": 6.965207472135811, | |
| "learning_rate": 6.291139240506329e-07, | |
| "logits/chosen": -1.1845703125, | |
| "logits/rejected": -1.189453125, | |
| "logps/chosen": -320.75, | |
| "logps/rejected": -302.625, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.5390625, | |
| "rewards/chosen": 0.059600830078125, | |
| "rewards/margins": 0.011350154876708984, | |
| "rewards/rejected": 0.0481719970703125, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.09216, | |
| "grad_norm": 6.690590284756765, | |
| "learning_rate": 6.379746835443037e-07, | |
| "logits/chosen": -1.2001953125, | |
| "logits/rejected": -1.1767578125, | |
| "logps/chosen": -311.125, | |
| "logps/rejected": -314.25, | |
| "loss": 0.6867, | |
| "rewards/accuracies": 0.4921875, | |
| "rewards/chosen": 0.058197021484375, | |
| "rewards/margins": 0.012401580810546875, | |
| "rewards/rejected": 0.045806884765625, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09344, | |
| "grad_norm": 6.995206172986862, | |
| "learning_rate": 6.468354430379746e-07, | |
| "logits/chosen": -1.185546875, | |
| "logits/rejected": -1.2392578125, | |
| "logps/chosen": -306.125, | |
| "logps/rejected": -320.25, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.4609375, | |
| "rewards/chosen": 0.054534912109375, | |
| "rewards/margins": 0.002094268798828125, | |
| "rewards/rejected": 0.052459716796875, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.09472, | |
| "grad_norm": 7.197486018158177, | |
| "learning_rate": 6.556962025316455e-07, | |
| "logits/chosen": -1.189453125, | |
| "logits/rejected": -1.18701171875, | |
| "logps/chosen": -321.75, | |
| "logps/rejected": -315.25, | |
| "loss": 0.6816, | |
| "rewards/accuracies": 0.6328125, | |
| "rewards/chosen": 0.06298828125, | |
| "rewards/margins": 0.02439117431640625, | |
| "rewards/rejected": 0.038543701171875, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 7.062298755275947, | |
| "learning_rate": 6.645569620253163e-07, | |
| "logits/chosen": -1.17529296875, | |
| "logits/rejected": -1.18359375, | |
| "logps/chosen": -337.625, | |
| "logps/rejected": -304.875, | |
| "loss": 0.6865, | |
| "rewards/accuracies": 0.5859375, | |
| "rewards/chosen": 0.068359375, | |
| "rewards/margins": 0.01427459716796875, | |
| "rewards/rejected": 0.0540771484375, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.09728, | |
| "grad_norm": 6.9747053464578155, | |
| "learning_rate": 6.734177215189874e-07, | |
| "logits/chosen": -1.17431640625, | |
| "logits/rejected": -1.197265625, | |
| "logps/chosen": -320.25, | |
| "logps/rejected": -305.0, | |
| "loss": 0.6862, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.06768798828125, | |
| "rewards/margins": 0.014251708984375, | |
| "rewards/rejected": 0.053436279296875, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09856, | |
| "grad_norm": 7.0914726140322015, | |
| "learning_rate": 6.822784810126582e-07, | |
| "logits/chosen": -1.25, | |
| "logits/rejected": -1.28515625, | |
| "logps/chosen": -302.625, | |
| "logps/rejected": -311.5, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.06793212890625, | |
| "rewards/margins": 0.014862060546875, | |
| "rewards/rejected": 0.0531005859375, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.09984, | |
| "grad_norm": 6.924658831045798, | |
| "learning_rate": 6.911392405063291e-07, | |
| "logits/chosen": -1.1904296875, | |
| "logits/rejected": -1.2080078125, | |
| "logps/chosen": -312.375, | |
| "logps/rejected": -312.75, | |
| "loss": 0.6879, | |
| "rewards/accuracies": 0.5234375, | |
| "rewards/chosen": 0.064971923828125, | |
| "rewards/margins": 0.010891914367675781, | |
| "rewards/rejected": 0.054107666015625, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "grad_norm": 6.617651432015416, | |
| "learning_rate": 7e-07, | |
| "logits/chosen": -1.220703125, | |
| "logits/rejected": -1.2080078125, | |
| "logps/chosen": -307.5, | |
| "logps/rejected": -288.125, | |
| "loss": 0.6858, | |
| "rewards/accuracies": 0.5703125, | |
| "rewards/chosen": 0.06640625, | |
| "rewards/margins": 0.01545858383178711, | |
| "rewards/rejected": 0.050933837890625, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 7.148212562285202, | |
| "learning_rate": 6.999964952031891e-07, | |
| "logits/chosen": -1.142578125, | |
| "logits/rejected": -1.13818359375, | |
| "logps/chosen": -340.5, | |
| "logps/rejected": -325.375, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.069793701171875, | |
| "rewards/margins": 0.021467208862304688, | |
| "rewards/rejected": 0.04840087890625, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10368, | |
| "grad_norm": 6.912997090681548, | |
| "learning_rate": 6.999859808829482e-07, | |
| "logits/chosen": -1.212890625, | |
| "logits/rejected": -1.1943359375, | |
| "logps/chosen": -329.125, | |
| "logps/rejected": -302.75, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": 0.071319580078125, | |
| "rewards/margins": 0.024139404296875, | |
| "rewards/rejected": 0.047149658203125, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.10496, | |
| "grad_norm": 6.642590263041201, | |
| "learning_rate": 6.999684572498524e-07, | |
| "logits/chosen": -1.251953125, | |
| "logits/rejected": -1.2294921875, | |
| "logps/chosen": -306.5, | |
| "logps/rejected": -282.875, | |
| "loss": 0.6837, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.070587158203125, | |
| "rewards/margins": 0.020760536193847656, | |
| "rewards/rejected": 0.0496978759765625, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10624, | |
| "grad_norm": 7.156833321052252, | |
| "learning_rate": 6.99943924654854e-07, | |
| "logits/chosen": -1.20947265625, | |
| "logits/rejected": -1.240234375, | |
| "logps/chosen": -299.5, | |
| "logps/rejected": -319.5, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.07061767578125, | |
| "rewards/margins": 0.011600494384765625, | |
| "rewards/rejected": 0.058990478515625, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.10752, | |
| "grad_norm": 7.161233013169767, | |
| "learning_rate": 6.999123835892781e-07, | |
| "logits/chosen": -1.2470703125, | |
| "logits/rejected": -1.2265625, | |
| "logps/chosen": -361.625, | |
| "logps/rejected": -346.875, | |
| "loss": 0.6774, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.078857421875, | |
| "rewards/margins": 0.03279876708984375, | |
| "rewards/rejected": 0.04608154296875, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 7.380079541175457, | |
| "learning_rate": 6.998738346848098e-07, | |
| "logits/chosen": -1.169921875, | |
| "logits/rejected": -1.171875, | |
| "logps/chosen": -321.75, | |
| "logps/rejected": -313.0, | |
| "loss": 0.6758, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": 0.076507568359375, | |
| "rewards/margins": 0.0360107421875, | |
| "rewards/rejected": 0.04058837890625, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.11008, | |
| "grad_norm": 6.787264499218527, | |
| "learning_rate": 6.998282787134845e-07, | |
| "logits/chosen": -1.2353515625, | |
| "logits/rejected": -1.2216796875, | |
| "logps/chosen": -307.625, | |
| "logps/rejected": -280.625, | |
| "loss": 0.681, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": 0.07275390625, | |
| "rewards/margins": 0.025938034057617188, | |
| "rewards/rejected": 0.046783447265625, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.11136, | |
| "grad_norm": 7.070691711467475, | |
| "learning_rate": 6.997757165876698e-07, | |
| "logits/chosen": -1.212890625, | |
| "logits/rejected": -1.21484375, | |
| "logps/chosen": -333.5, | |
| "logps/rejected": -326.5, | |
| "loss": 0.681, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.075775146484375, | |
| "rewards/margins": 0.024099349975585938, | |
| "rewards/rejected": 0.05169677734375, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.11264, | |
| "grad_norm": 7.281268067802746, | |
| "learning_rate": 6.997161493600493e-07, | |
| "logits/chosen": -1.2333984375, | |
| "logits/rejected": -1.20751953125, | |
| "logps/chosen": -342.375, | |
| "logps/rejected": -297.75, | |
| "loss": 0.6748, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": 0.0849609375, | |
| "rewards/margins": 0.038787841796875, | |
| "rewards/rejected": 0.0461883544921875, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.11392, | |
| "grad_norm": 6.721540641608089, | |
| "learning_rate": 6.996495782236003e-07, | |
| "logits/chosen": -1.1689453125, | |
| "logits/rejected": -1.1826171875, | |
| "logps/chosen": -284.5, | |
| "logps/rejected": -302.75, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.5234375, | |
| "rewards/chosen": 0.063812255859375, | |
| "rewards/margins": 0.011150360107421875, | |
| "rewards/rejected": 0.052581787109375, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 6.658821074174305, | |
| "learning_rate": 6.9957600451157e-07, | |
| "logits/chosen": -1.2216796875, | |
| "logits/rejected": -1.2529296875, | |
| "logps/chosen": -289.0, | |
| "logps/rejected": -299.5, | |
| "loss": 0.6808, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": 0.0616455078125, | |
| "rewards/margins": 0.025604248046875, | |
| "rewards/rejected": 0.03614044189453125, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11648, | |
| "grad_norm": 6.960320737670953, | |
| "learning_rate": 6.994954296974495e-07, | |
| "logits/chosen": -1.23388671875, | |
| "logits/rejected": -1.263671875, | |
| "logps/chosen": -302.5, | |
| "logps/rejected": -310.25, | |
| "loss": 0.6793, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.07379150390625, | |
| "rewards/margins": 0.029428482055664062, | |
| "rewards/rejected": 0.044342041015625, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.11776, | |
| "grad_norm": 7.049238366581128, | |
| "learning_rate": 6.994078553949439e-07, | |
| "logits/chosen": -1.2294921875, | |
| "logits/rejected": -1.267578125, | |
| "logps/chosen": -313.625, | |
| "logps/rejected": -294.0, | |
| "loss": 0.6731, | |
| "rewards/accuracies": 0.6328125, | |
| "rewards/chosen": 0.0853271484375, | |
| "rewards/margins": 0.0420074462890625, | |
| "rewards/rejected": 0.04345703125, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11904, | |
| "grad_norm": 6.649581467509272, | |
| "learning_rate": 6.993132833579392e-07, | |
| "logits/chosen": -1.20703125, | |
| "logits/rejected": -1.2236328125, | |
| "logps/chosen": -287.5, | |
| "logps/rejected": -288.25, | |
| "loss": 0.6779, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.075164794921875, | |
| "rewards/margins": 0.03218841552734375, | |
| "rewards/rejected": 0.04302978515625, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.12032, | |
| "grad_norm": 6.872841249887952, | |
| "learning_rate": 6.992117154804688e-07, | |
| "logits/chosen": -1.1748046875, | |
| "logits/rejected": -1.224609375, | |
| "logps/chosen": -314.25, | |
| "logps/rejected": -305.0, | |
| "loss": 0.6812, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.07373046875, | |
| "rewards/margins": 0.026458740234375, | |
| "rewards/rejected": 0.0472412109375, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 7.327496061016414, | |
| "learning_rate": 6.99103153796674e-07, | |
| "logits/chosen": -1.173828125, | |
| "logits/rejected": -1.1796875, | |
| "logps/chosen": -337.25, | |
| "logps/rejected": -300.75, | |
| "loss": 0.6748, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": 0.07318115234375, | |
| "rewards/margins": 0.0394134521484375, | |
| "rewards/rejected": 0.03388214111328125, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.12288, | |
| "grad_norm": 7.333095642704951, | |
| "learning_rate": 6.989876004807644e-07, | |
| "logits/chosen": -1.2060546875, | |
| "logits/rejected": -1.2041015625, | |
| "logps/chosen": -344.75, | |
| "logps/rejected": -315.0, | |
| "loss": 0.6733, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.07806396484375, | |
| "rewards/margins": 0.04097175598144531, | |
| "rewards/rejected": 0.0370635986328125, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.12416, | |
| "grad_norm": 7.054266672465839, | |
| "learning_rate": 6.988650578469735e-07, | |
| "logits/chosen": -1.2177734375, | |
| "logits/rejected": -1.2255859375, | |
| "logps/chosen": -326.25, | |
| "logps/rejected": -359.0, | |
| "loss": 0.6661, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.071868896484375, | |
| "rewards/margins": 0.05510711669921875, | |
| "rewards/rejected": 0.016735076904296875, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.12544, | |
| "grad_norm": 7.188205202679432, | |
| "learning_rate": 6.98735528349513e-07, | |
| "logits/chosen": -1.18212890625, | |
| "logits/rejected": -1.2021484375, | |
| "logps/chosen": -273.875, | |
| "logps/rejected": -304.0, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.059661865234375, | |
| "rewards/margins": 0.03852081298828125, | |
| "rewards/rejected": 0.02120208740234375, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.12672, | |
| "grad_norm": 7.374652320682456, | |
| "learning_rate": 6.985990145825232e-07, | |
| "logits/chosen": -1.13818359375, | |
| "logits/rejected": -1.1669921875, | |
| "logps/chosen": -330.25, | |
| "logps/rejected": -322.0, | |
| "loss": 0.6701, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 0.074493408203125, | |
| "rewards/margins": 0.04855918884277344, | |
| "rewards/rejected": 0.02597808837890625, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 7.010945640873385, | |
| "learning_rate": 6.984555192800215e-07, | |
| "logits/chosen": -1.244140625, | |
| "logits/rejected": -1.296875, | |
| "logps/chosen": -314.75, | |
| "logps/rejected": -322.5, | |
| "loss": 0.6711, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 0.076141357421875, | |
| "rewards/margins": 0.0458831787109375, | |
| "rewards/rejected": 0.030277252197265625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "eval_logits/chosen": -1.169921875, | |
| "eval_logits/rejected": -1.2216796875, | |
| "eval_logps/chosen": -314.125, | |
| "eval_logps/rejected": -299.0625, | |
| "eval_loss": 0.6733124852180481, | |
| "eval_rewards/accuracies": 0.649609386920929, | |
| "eval_rewards/chosen": 0.0625, | |
| "eval_rewards/margins": 0.04170989990234375, | |
| "eval_rewards/rejected": 0.020813941955566406, | |
| "eval_runtime": 27.7049, | |
| "eval_samples_per_second": 18.047, | |
| "eval_steps_per_second": 0.578, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12928, | |
| "grad_norm": 6.562841834429094, | |
| "learning_rate": 6.983050453158471e-07, | |
| "logits/chosen": -1.18994140625, | |
| "logits/rejected": -1.171875, | |
| "logps/chosen": -294.0, | |
| "logps/rejected": -260.25, | |
| "loss": 0.6775, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.058380126953125, | |
| "rewards/margins": 0.0331878662109375, | |
| "rewards/rejected": 0.025234222412109375, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.13056, | |
| "grad_norm": 7.311489748652196, | |
| "learning_rate": 6.981475957036038e-07, | |
| "logits/chosen": -1.18359375, | |
| "logits/rejected": -1.1904296875, | |
| "logps/chosen": -319.0, | |
| "logps/rejected": -322.5, | |
| "loss": 0.6697, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": 0.061248779296875, | |
| "rewards/margins": 0.049468994140625, | |
| "rewards/rejected": 0.011905670166015625, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.13184, | |
| "grad_norm": 6.969731388418404, | |
| "learning_rate": 6.979831735965997e-07, | |
| "logits/chosen": -1.19921875, | |
| "logits/rejected": -1.2529296875, | |
| "logps/chosen": -312.5, | |
| "logps/rejected": -329.0, | |
| "loss": 0.6769, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": 0.057342529296875, | |
| "rewards/margins": 0.03508758544921875, | |
| "rewards/rejected": 0.02228546142578125, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.13312, | |
| "grad_norm": 7.382805496910107, | |
| "learning_rate": 6.978117822877838e-07, | |
| "logits/chosen": -1.1396484375, | |
| "logits/rejected": -1.162109375, | |
| "logps/chosen": -346.0, | |
| "logps/rejected": -328.25, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": 0.0740966796875, | |
| "rewards/margins": 0.0634307861328125, | |
| "rewards/rejected": 0.010618209838867188, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 7.529482042313781, | |
| "learning_rate": 6.976334252096801e-07, | |
| "logits/chosen": -1.2216796875, | |
| "logits/rejected": -1.26123046875, | |
| "logps/chosen": -304.875, | |
| "logps/rejected": -337.625, | |
| "loss": 0.6731, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": 0.0610198974609375, | |
| "rewards/margins": 0.043567657470703125, | |
| "rewards/rejected": 0.0174560546875, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.13568, | |
| "grad_norm": 7.287196640480614, | |
| "learning_rate": 6.974481059343188e-07, | |
| "logits/chosen": -1.240234375, | |
| "logits/rejected": -1.224609375, | |
| "logps/chosen": -338.25, | |
| "logps/rejected": -301.125, | |
| "loss": 0.6694, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.0517730712890625, | |
| "rewards/margins": 0.0513458251953125, | |
| "rewards/rejected": 0.000377655029296875, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13696, | |
| "grad_norm": 7.457935260315348, | |
| "learning_rate": 6.972558281731654e-07, | |
| "logits/chosen": -1.18359375, | |
| "logits/rejected": -1.2529296875, | |
| "logps/chosen": -308.125, | |
| "logps/rejected": -343.875, | |
| "loss": 0.6727, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": 0.03688812255859375, | |
| "rewards/margins": 0.04468536376953125, | |
| "rewards/rejected": -0.007733345031738281, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.13824, | |
| "grad_norm": 7.110683172273849, | |
| "learning_rate": 6.970565957770455e-07, | |
| "logits/chosen": -1.2783203125, | |
| "logits/rejected": -1.279296875, | |
| "logps/chosen": -325.375, | |
| "logps/rejected": -300.75, | |
| "loss": 0.6628, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": 0.04693603515625, | |
| "rewards/margins": 0.06512451171875, | |
| "rewards/rejected": -0.01806640625, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.13952, | |
| "grad_norm": 7.4393288593299935, | |
| "learning_rate": 6.96850412736068e-07, | |
| "logits/chosen": -1.1689453125, | |
| "logits/rejected": -1.20166015625, | |
| "logps/chosen": -293.75, | |
| "logps/rejected": -323.25, | |
| "loss": 0.6661, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.03907012939453125, | |
| "rewards/margins": 0.056720733642578125, | |
| "rewards/rejected": -0.0177459716796875, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 7.027414643492787, | |
| "learning_rate": 6.96637283179545e-07, | |
| "logits/chosen": -1.1953125, | |
| "logits/rejected": -1.2119140625, | |
| "logps/chosen": -319.25, | |
| "logps/rejected": -312.0, | |
| "loss": 0.6673, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.024829864501953125, | |
| "rewards/margins": 0.054996490478515625, | |
| "rewards/rejected": -0.030157089233398438, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14208, | |
| "grad_norm": 7.3061024388552065, | |
| "learning_rate": 6.9641721137591e-07, | |
| "logits/chosen": -1.1865234375, | |
| "logits/rejected": -1.1982421875, | |
| "logps/chosen": -347.5, | |
| "logps/rejected": -333.5, | |
| "loss": 0.653, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 0.02840423583984375, | |
| "rewards/margins": 0.085723876953125, | |
| "rewards/rejected": -0.057373046875, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.14336, | |
| "grad_norm": 7.218209771794371, | |
| "learning_rate": 6.961902017326311e-07, | |
| "logits/chosen": -1.14892578125, | |
| "logits/rejected": -1.22119140625, | |
| "logps/chosen": -290.5, | |
| "logps/rejected": -310.375, | |
| "loss": 0.6562, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 0.0299224853515625, | |
| "rewards/margins": 0.07830810546875, | |
| "rewards/rejected": -0.0483551025390625, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.14464, | |
| "grad_norm": 7.635227167652353, | |
| "learning_rate": 6.959562587961234e-07, | |
| "logits/chosen": -1.14794921875, | |
| "logits/rejected": -1.17919921875, | |
| "logps/chosen": -305.5, | |
| "logps/rejected": -329.25, | |
| "loss": 0.658, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.0113372802734375, | |
| "rewards/margins": 0.076263427734375, | |
| "rewards/rejected": -0.06497573852539062, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.14592, | |
| "grad_norm": 7.086183935410638, | |
| "learning_rate": 6.957153872516586e-07, | |
| "logits/chosen": -1.1669921875, | |
| "logits/rejected": -1.236328125, | |
| "logps/chosen": -334.25, | |
| "logps/rejected": -324.25, | |
| "loss": 0.667, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": 0.0051097869873046875, | |
| "rewards/margins": 0.05683135986328125, | |
| "rewards/rejected": -0.05169677734375, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 6.902888342176391, | |
| "learning_rate": 6.954675919232694e-07, | |
| "logits/chosen": -1.20703125, | |
| "logits/rejected": -1.23828125, | |
| "logps/chosen": -307.75, | |
| "logps/rejected": -292.125, | |
| "loss": 0.6636, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": 0.0019683837890625, | |
| "rewards/margins": 0.064910888671875, | |
| "rewards/rejected": -0.06283187866210938, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.14848, | |
| "grad_norm": 6.9983913393596735, | |
| "learning_rate": 6.95212877773655e-07, | |
| "logits/chosen": -1.1494140625, | |
| "logits/rejected": -1.16650390625, | |
| "logps/chosen": -316.25, | |
| "logps/rejected": -320.25, | |
| "loss": 0.6636, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.01812744140625, | |
| "rewards/margins": 0.06414794921875, | |
| "rewards/rejected": -0.082275390625, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14976, | |
| "grad_norm": 7.039439125767687, | |
| "learning_rate": 6.949512499040799e-07, | |
| "logits/chosen": -1.205078125, | |
| "logits/rejected": -1.2041015625, | |
| "logps/chosen": -314.0, | |
| "logps/rejected": -309.75, | |
| "loss": 0.6702, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -0.03998565673828125, | |
| "rewards/margins": 0.051842689514160156, | |
| "rewards/rejected": -0.091796875, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.15104, | |
| "grad_norm": 7.433508336249601, | |
| "learning_rate": 6.946827135542728e-07, | |
| "logits/chosen": -1.09521484375, | |
| "logits/rejected": -1.140625, | |
| "logps/chosen": -310.25, | |
| "logps/rejected": -334.625, | |
| "loss": 0.6686, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -0.051239013671875, | |
| "rewards/margins": 0.05425071716308594, | |
| "rewards/rejected": -0.105438232421875, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.15232, | |
| "grad_norm": 7.340968519858034, | |
| "learning_rate": 6.944072741023215e-07, | |
| "logits/chosen": -1.1787109375, | |
| "logits/rejected": -1.2099609375, | |
| "logps/chosen": -325.75, | |
| "logps/rejected": -330.5, | |
| "loss": 0.6539, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.0429840087890625, | |
| "rewards/margins": 0.084930419921875, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 7.136653505773104, | |
| "learning_rate": 6.941249370645649e-07, | |
| "logits/chosen": -1.1865234375, | |
| "logits/rejected": -1.171875, | |
| "logps/chosen": -329.25, | |
| "logps/rejected": -323.75, | |
| "loss": 0.6649, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -0.05987548828125, | |
| "rewards/margins": 0.06435394287109375, | |
| "rewards/rejected": -0.12432861328125, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15488, | |
| "grad_norm": 7.355481505262627, | |
| "learning_rate": 6.938357080954826e-07, | |
| "logits/chosen": -1.05419921875, | |
| "logits/rejected": -1.076171875, | |
| "logps/chosen": -334.25, | |
| "logps/rejected": -338.25, | |
| "loss": 0.6473, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -0.03987884521484375, | |
| "rewards/margins": 0.09857177734375, | |
| "rewards/rejected": -0.1387939453125, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.15616, | |
| "grad_norm": 7.263830648748775, | |
| "learning_rate": 6.935395929875821e-07, | |
| "logits/chosen": -1.17333984375, | |
| "logits/rejected": -1.1708984375, | |
| "logps/chosen": -335.75, | |
| "logps/rejected": -332.0, | |
| "loss": 0.6578, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0830535888671875, | |
| "rewards/margins": 0.077911376953125, | |
| "rewards/rejected": -0.16094970703125, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.15744, | |
| "grad_norm": 8.404772457823935, | |
| "learning_rate": 6.932365976712819e-07, | |
| "logits/chosen": -1.09716796875, | |
| "logits/rejected": -1.13525390625, | |
| "logps/chosen": -315.875, | |
| "logps/rejected": -359.0, | |
| "loss": 0.6414, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.076934814453125, | |
| "rewards/margins": 0.115936279296875, | |
| "rewards/rejected": -0.19287109375, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.15872, | |
| "grad_norm": 7.621326463499134, | |
| "learning_rate": 6.929267282147936e-07, | |
| "logits/chosen": -1.07373046875, | |
| "logits/rejected": -1.11767578125, | |
| "logps/chosen": -344.125, | |
| "logps/rejected": -355.0, | |
| "loss": 0.6457, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -0.08642578125, | |
| "rewards/margins": 0.1038818359375, | |
| "rewards/rejected": -0.1903076171875, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 7.788922626286128, | |
| "learning_rate": 6.926099908240002e-07, | |
| "logits/chosen": -1.142578125, | |
| "logits/rejected": -1.1640625, | |
| "logps/chosen": -328.5, | |
| "logps/rejected": -356.75, | |
| "loss": 0.6583, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.102294921875, | |
| "rewards/margins": 0.0783233642578125, | |
| "rewards/rejected": -0.18048095703125, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.16128, | |
| "grad_norm": 7.473164766096368, | |
| "learning_rate": 6.922863918423311e-07, | |
| "logits/chosen": -1.12744140625, | |
| "logits/rejected": -1.1103515625, | |
| "logps/chosen": -343.25, | |
| "logps/rejected": -337.75, | |
| "loss": 0.6575, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.12322998046875, | |
| "rewards/margins": 0.080108642578125, | |
| "rewards/rejected": -0.2030029296875, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.16256, | |
| "grad_norm": 7.64697192150379, | |
| "learning_rate": 6.919559377506359e-07, | |
| "logits/chosen": -1.177734375, | |
| "logits/rejected": -1.21484375, | |
| "logps/chosen": -333.5, | |
| "logps/rejected": -354.75, | |
| "loss": 0.6485, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -0.13275146484375, | |
| "rewards/margins": 0.10205078125, | |
| "rewards/rejected": -0.2347412109375, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.16384, | |
| "grad_norm": 7.253778836602147, | |
| "learning_rate": 6.916186351670546e-07, | |
| "logits/chosen": -1.12353515625, | |
| "logits/rejected": -1.16650390625, | |
| "logps/chosen": -318.625, | |
| "logps/rejected": -313.5, | |
| "loss": 0.6458, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.16357421875, | |
| "rewards/margins": 0.106414794921875, | |
| "rewards/rejected": -0.2698974609375, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.16512, | |
| "grad_norm": 7.447630373123696, | |
| "learning_rate": 6.91274490846884e-07, | |
| "logits/chosen": -1.12158203125, | |
| "logits/rejected": -1.10791015625, | |
| "logps/chosen": -333.75, | |
| "logps/rejected": -306.625, | |
| "loss": 0.6538, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -0.17535400390625, | |
| "rewards/margins": 0.088592529296875, | |
| "rewards/rejected": -0.263916015625, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 7.4679011809033815, | |
| "learning_rate": 6.90923511682444e-07, | |
| "logits/chosen": -1.1396484375, | |
| "logits/rejected": -1.171875, | |
| "logps/chosen": -331.75, | |
| "logps/rejected": -361.5, | |
| "loss": 0.6447, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -0.19451904296875, | |
| "rewards/margins": 0.112060546875, | |
| "rewards/rejected": -0.3065185546875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16768, | |
| "grad_norm": 7.362515597925838, | |
| "learning_rate": 6.905657047029384e-07, | |
| "logits/chosen": -1.087890625, | |
| "logits/rejected": -1.1240234375, | |
| "logps/chosen": -324.75, | |
| "logps/rejected": -308.875, | |
| "loss": 0.6387, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": -0.2166748046875, | |
| "rewards/margins": 0.126708984375, | |
| "rewards/rejected": -0.34326171875, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.16896, | |
| "grad_norm": 7.770937607646877, | |
| "learning_rate": 6.90201077074314e-07, | |
| "logits/chosen": -1.1044921875, | |
| "logits/rejected": -1.1640625, | |
| "logps/chosen": -329.25, | |
| "logps/rejected": -347.25, | |
| "loss": 0.6436, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -0.2451171875, | |
| "rewards/margins": 0.11639404296875, | |
| "rewards/rejected": -0.36181640625, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17024, | |
| "grad_norm": 7.943995279771987, | |
| "learning_rate": 6.898296360991182e-07, | |
| "logits/chosen": -1.10205078125, | |
| "logits/rejected": -1.13330078125, | |
| "logps/chosen": -356.25, | |
| "logps/rejected": -357.25, | |
| "loss": 0.6337, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -0.2764892578125, | |
| "rewards/margins": 0.13845062255859375, | |
| "rewards/rejected": -0.414794921875, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.17152, | |
| "grad_norm": 8.15643307389567, | |
| "learning_rate": 6.894513892163518e-07, | |
| "logits/chosen": -1.0361328125, | |
| "logits/rejected": -1.05517578125, | |
| "logps/chosen": -372.0, | |
| "logps/rejected": -356.75, | |
| "loss": 0.6553, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -0.291015625, | |
| "rewards/margins": 0.0922698974609375, | |
| "rewards/rejected": -0.38330078125, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 8.402149118568568, | |
| "learning_rate": 6.890663440013204e-07, | |
| "logits/chosen": -1.07666015625, | |
| "logits/rejected": -1.1103515625, | |
| "logps/chosen": -367.25, | |
| "logps/rejected": -380.5, | |
| "loss": 0.636, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3759765625, | |
| "rewards/margins": 0.14380645751953125, | |
| "rewards/rejected": -0.519287109375, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.17408, | |
| "grad_norm": 7.676163099722455, | |
| "learning_rate": 6.886745081654823e-07, | |
| "logits/chosen": -1.06005859375, | |
| "logits/rejected": -1.076171875, | |
| "logps/chosen": -365.25, | |
| "logps/rejected": -364.125, | |
| "loss": 0.6389, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -0.3699951171875, | |
| "rewards/margins": 0.129241943359375, | |
| "rewards/rejected": -0.498779296875, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.17536, | |
| "grad_norm": 7.792721927404631, | |
| "learning_rate": 6.882758895562949e-07, | |
| "logits/chosen": -0.97998046875, | |
| "logits/rejected": -1.05126953125, | |
| "logps/chosen": -360.25, | |
| "logps/rejected": -366.25, | |
| "loss": 0.6345, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -0.42626953125, | |
| "rewards/margins": 0.145538330078125, | |
| "rewards/rejected": -0.572021484375, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.17664, | |
| "grad_norm": 13.220416758370622, | |
| "learning_rate": 6.878704961570564e-07, | |
| "logits/chosen": -0.9208984375, | |
| "logits/rejected": -0.9052734375, | |
| "logps/chosen": -427.75, | |
| "logps/rejected": -412.75, | |
| "loss": 0.6566, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.642822265625, | |
| "rewards/margins": 0.10161972045898438, | |
| "rewards/rejected": -0.744140625, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17792, | |
| "grad_norm": 8.66931437809148, | |
| "learning_rate": 6.874583360867468e-07, | |
| "logits/chosen": -0.8505859375, | |
| "logits/rejected": -0.88330078125, | |
| "logps/chosen": -399.0, | |
| "logps/rejected": -415.0, | |
| "loss": 0.6271, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.6767578125, | |
| "rewards/margins": 0.1732177734375, | |
| "rewards/rejected": -0.849609375, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 8.526440571507557, | |
| "learning_rate": 6.87039417599865e-07, | |
| "logits/chosen": -0.875, | |
| "logits/rejected": -0.91796875, | |
| "logps/chosen": -376.25, | |
| "logps/rejected": -390.25, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": -0.6337890625, | |
| "rewards/margins": 0.18011474609375, | |
| "rewards/rejected": -0.8134765625, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18048, | |
| "grad_norm": 9.392422913412458, | |
| "learning_rate": 6.866137490862636e-07, | |
| "logits/chosen": -0.78857421875, | |
| "logits/rejected": -0.8232421875, | |
| "logps/chosen": -392.25, | |
| "logps/rejected": -399.25, | |
| "loss": 0.6189, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -0.6796875, | |
| "rewards/margins": 0.19610595703125, | |
| "rewards/rejected": -0.87646484375, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.18176, | |
| "grad_norm": 9.326150506259697, | |
| "learning_rate": 6.861813390709803e-07, | |
| "logits/chosen": -0.71630859375, | |
| "logits/rejected": -0.78125, | |
| "logps/chosen": -369.5, | |
| "logps/rejected": -404.25, | |
| "loss": 0.6118, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.7138671875, | |
| "rewards/margins": 0.21263885498046875, | |
| "rewards/rejected": -0.92529296875, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.18304, | |
| "grad_norm": 8.384403124201002, | |
| "learning_rate": 6.857421962140681e-07, | |
| "logits/chosen": -0.8466796875, | |
| "logits/rejected": -0.87451171875, | |
| "logps/chosen": -408.5, | |
| "logps/rejected": -402.0, | |
| "loss": 0.6247, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -0.72265625, | |
| "rewards/margins": 0.19293212890625, | |
| "rewards/rejected": -0.91552734375, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.18432, | |
| "grad_norm": 9.388640196159205, | |
| "learning_rate": 6.852963293104211e-07, | |
| "logits/chosen": -0.94677734375, | |
| "logits/rejected": -0.9345703125, | |
| "logps/chosen": -389.25, | |
| "logps/rejected": -378.0, | |
| "loss": 0.621, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6689453125, | |
| "rewards/margins": 0.1993408203125, | |
| "rewards/rejected": -0.86865234375, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 11.03326645275443, | |
| "learning_rate": 6.848437472895988e-07, | |
| "logits/chosen": -0.9150390625, | |
| "logits/rejected": -1.00146484375, | |
| "logps/chosen": -386.75, | |
| "logps/rejected": -431.25, | |
| "loss": 0.5984, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -0.642578125, | |
| "rewards/margins": 0.234375, | |
| "rewards/rejected": -0.876953125, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.18688, | |
| "grad_norm": 61.60360857590808, | |
| "learning_rate": 6.843844592156471e-07, | |
| "logits/chosen": -0.83984375, | |
| "logits/rejected": -0.96044921875, | |
| "logps/chosen": -348.5, | |
| "logps/rejected": -454.75, | |
| "loss": 0.6328, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -0.7197265625, | |
| "rewards/margins": 0.17962646484375, | |
| "rewards/rejected": -0.89892578125, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.18816, | |
| "grad_norm": 15.422965574908865, | |
| "learning_rate": 6.839184742869166e-07, | |
| "logits/chosen": -0.86328125, | |
| "logits/rejected": -0.85986328125, | |
| "logps/chosen": -401.25, | |
| "logps/rejected": -429.5, | |
| "loss": 0.5783, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -0.79638671875, | |
| "rewards/margins": 0.29449462890625, | |
| "rewards/rejected": -1.08984375, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.18944, | |
| "grad_norm": 13.374494698966174, | |
| "learning_rate": 6.834458018358787e-07, | |
| "logits/chosen": -0.8583984375, | |
| "logits/rejected": -0.83056640625, | |
| "logps/chosen": -438.25, | |
| "logps/rejected": -396.25, | |
| "loss": 0.6247, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -0.9375, | |
| "rewards/margins": 0.221160888671875, | |
| "rewards/rejected": -1.15869140625, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.19072, | |
| "grad_norm": 10.50579381133434, | |
| "learning_rate": 6.829664513289386e-07, | |
| "logits/chosen": -0.6806640625, | |
| "logits/rejected": -0.696533203125, | |
| "logps/chosen": -429.25, | |
| "logps/rejected": -445.0, | |
| "loss": 0.6204, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.02392578125, | |
| "rewards/margins": 0.21826171875, | |
| "rewards/rejected": -1.24072265625, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 8.912036503648771, | |
| "learning_rate": 6.824804323662456e-07, | |
| "logits/chosen": -0.833984375, | |
| "logits/rejected": -0.85009765625, | |
| "logps/chosen": -450.5, | |
| "logps/rejected": -462.75, | |
| "loss": 0.6157, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -1.15869140625, | |
| "rewards/margins": 0.23162841796875, | |
| "rewards/rejected": -1.3916015625, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19328, | |
| "grad_norm": 8.776101733274905, | |
| "learning_rate": 6.819877546815008e-07, | |
| "logits/chosen": -0.8564453125, | |
| "logits/rejected": -0.875, | |
| "logps/chosen": -438.25, | |
| "logps/rejected": -447.25, | |
| "loss": 0.6202, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -1.07470703125, | |
| "rewards/margins": 0.23724365234375, | |
| "rewards/rejected": -1.3115234375, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.19456, | |
| "grad_norm": 10.578778147443705, | |
| "learning_rate": 6.814884281417626e-07, | |
| "logits/chosen": -0.8427734375, | |
| "logits/rejected": -0.8701171875, | |
| "logps/chosen": -434.75, | |
| "logps/rejected": -448.25, | |
| "loss": 0.591, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.03515625, | |
| "rewards/margins": 0.2916259765625, | |
| "rewards/rejected": -1.3251953125, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.19584, | |
| "grad_norm": 8.049518104374286, | |
| "learning_rate": 6.809824627472483e-07, | |
| "logits/chosen": -0.8037109375, | |
| "logits/rejected": -0.8603515625, | |
| "logps/chosen": -419.5, | |
| "logps/rejected": -428.5, | |
| "loss": 0.5812, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.02392578125, | |
| "rewards/margins": 0.31982421875, | |
| "rewards/rejected": -1.34375, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.19712, | |
| "grad_norm": 8.793213481082436, | |
| "learning_rate": 6.804698686311346e-07, | |
| "logits/chosen": -0.87255859375, | |
| "logits/rejected": -0.85888671875, | |
| "logps/chosen": -437.75, | |
| "logps/rejected": -433.5, | |
| "loss": 0.6043, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -1.11376953125, | |
| "rewards/margins": 0.26483154296875, | |
| "rewards/rejected": -1.37939453125, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 17.16055737002578, | |
| "learning_rate": 6.79950656059354e-07, | |
| "logits/chosen": -0.888427734375, | |
| "logits/rejected": -0.934326171875, | |
| "logps/chosen": -470.75, | |
| "logps/rejected": -460.25, | |
| "loss": 0.5672, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -1.1201171875, | |
| "rewards/margins": 0.353271484375, | |
| "rewards/rejected": -1.474609375, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.19968, | |
| "grad_norm": 8.561528637211719, | |
| "learning_rate": 6.794248354303899e-07, | |
| "logits/chosen": -0.80615234375, | |
| "logits/rejected": -0.83203125, | |
| "logps/chosen": -441.75, | |
| "logps/rejected": -466.25, | |
| "loss": 0.6081, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -1.259765625, | |
| "rewards/margins": 0.260406494140625, | |
| "rewards/rejected": -1.5205078125, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.20096, | |
| "grad_norm": 8.69093146968424, | |
| "learning_rate": 6.788924172750679e-07, | |
| "logits/chosen": -0.87646484375, | |
| "logits/rejected": -0.9169921875, | |
| "logps/chosen": -437.5, | |
| "logps/rejected": -462.25, | |
| "loss": 0.5753, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -1.1572265625, | |
| "rewards/margins": 0.3331298828125, | |
| "rewards/rejected": -1.48828125, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.20224, | |
| "grad_norm": 8.31733029290186, | |
| "learning_rate": 6.783534122563447e-07, | |
| "logits/chosen": -0.7666015625, | |
| "logits/rejected": -0.8388671875, | |
| "logps/chosen": -427.5, | |
| "logps/rejected": -477.0, | |
| "loss": 0.5649, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.2607421875, | |
| "rewards/margins": 0.38958740234375, | |
| "rewards/rejected": -1.6484375, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20352, | |
| "grad_norm": 11.171939635720042, | |
| "learning_rate": 6.77807831169095e-07, | |
| "logits/chosen": -0.802734375, | |
| "logits/rejected": -0.86181640625, | |
| "logps/chosen": -476.75, | |
| "logps/rejected": -498.0, | |
| "loss": 0.5918, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -1.4375, | |
| "rewards/margins": 0.305908203125, | |
| "rewards/rejected": -1.7431640625, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 8.281097141476181, | |
| "learning_rate": 6.772556849398952e-07, | |
| "logits/chosen": -0.843994140625, | |
| "logits/rejected": -0.884765625, | |
| "logps/chosen": -505.75, | |
| "logps/rejected": -551.75, | |
| "loss": 0.5537, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -1.4814453125, | |
| "rewards/margins": 0.419677734375, | |
| "rewards/rejected": -1.9033203125, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.20608, | |
| "grad_norm": 10.239198232842558, | |
| "learning_rate": 6.766969846268044e-07, | |
| "logits/chosen": -0.7734375, | |
| "logits/rejected": -0.830078125, | |
| "logps/chosen": -459.5, | |
| "logps/rejected": -488.75, | |
| "loss": 0.6061, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -1.57373046875, | |
| "rewards/margins": 0.25689697265625, | |
| "rewards/rejected": -1.830078125, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.20736, | |
| "grad_norm": 9.747342835599794, | |
| "learning_rate": 6.761317414191428e-07, | |
| "logits/chosen": -0.8076171875, | |
| "logits/rejected": -0.8447265625, | |
| "logps/chosen": -483.75, | |
| "logps/rejected": -545.25, | |
| "loss": 0.5981, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": -1.779296875, | |
| "rewards/margins": 0.2919921875, | |
| "rewards/rejected": -2.072265625, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20864, | |
| "grad_norm": 9.525902367060457, | |
| "learning_rate": 6.755599666372684e-07, | |
| "logits/chosen": -0.80859375, | |
| "logits/rejected": -0.8984375, | |
| "logps/chosen": -454.5, | |
| "logps/rejected": -502.25, | |
| "loss": 0.582, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -1.671875, | |
| "rewards/margins": 0.3372802734375, | |
| "rewards/rejected": -2.0078125, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.20992, | |
| "grad_norm": 10.81029592784359, | |
| "learning_rate": 6.749816717323492e-07, | |
| "logits/chosen": -0.7578125, | |
| "logits/rejected": -0.8056640625, | |
| "logps/chosen": -518.75, | |
| "logps/rejected": -571.0, | |
| "loss": 0.5573, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -1.8740234375, | |
| "rewards/margins": 0.4305419921875, | |
| "rewards/rejected": -2.3046875, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 8.72875920033335, | |
| "learning_rate": 6.743968682861345e-07, | |
| "logits/chosen": -0.75830078125, | |
| "logits/rejected": -0.82470703125, | |
| "logps/chosen": -456.5, | |
| "logps/rejected": -518.25, | |
| "loss": 0.5457, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -1.642578125, | |
| "rewards/margins": 0.4796142578125, | |
| "rewards/rejected": -2.123046875, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.21248, | |
| "grad_norm": 12.52887713385305, | |
| "learning_rate": 6.738055680107232e-07, | |
| "logits/chosen": -0.8310546875, | |
| "logits/rejected": -0.83740234375, | |
| "logps/chosen": -502.75, | |
| "logps/rejected": -525.25, | |
| "loss": 0.6151, | |
| "rewards/accuracies": 0.6015625, | |
| "rewards/chosen": -1.794921875, | |
| "rewards/margins": 0.2906494140625, | |
| "rewards/rejected": -2.083984375, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.21376, | |
| "grad_norm": 12.573683619458626, | |
| "learning_rate": 6.732077827483283e-07, | |
| "logits/chosen": -0.79052734375, | |
| "logits/rejected": -0.7919921875, | |
| "logps/chosen": -510.75, | |
| "logps/rejected": -537.5, | |
| "loss": 0.6154, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": -1.8447265625, | |
| "rewards/margins": 0.3406219482421875, | |
| "rewards/rejected": -2.185546875, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.21504, | |
| "grad_norm": 8.275606690031259, | |
| "learning_rate": 6.726035244710405e-07, | |
| "logits/chosen": -0.796142578125, | |
| "logits/rejected": -0.814453125, | |
| "logps/chosen": -465.5, | |
| "logps/rejected": -497.0, | |
| "loss": 0.5702, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -1.51953125, | |
| "rewards/margins": 0.4149169921875, | |
| "rewards/rejected": -1.93359375, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.21632, | |
| "grad_norm": 10.111116876816661, | |
| "learning_rate": 6.719928052805885e-07, | |
| "logits/chosen": -0.83642578125, | |
| "logits/rejected": -0.841796875, | |
| "logps/chosen": -464.0, | |
| "logps/rejected": -512.75, | |
| "loss": 0.5484, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.36328125, | |
| "rewards/margins": 0.4599609375, | |
| "rewards/rejected": -1.82421875, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 14.001498683826862, | |
| "learning_rate": 6.713756374080959e-07, | |
| "logits/chosen": -0.8994140625, | |
| "logits/rejected": -0.9580078125, | |
| "logps/chosen": -458.75, | |
| "logps/rejected": -498.5, | |
| "loss": 0.5747, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -1.2998046875, | |
| "rewards/margins": 0.36627197265625, | |
| "rewards/rejected": -1.6669921875, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21888, | |
| "grad_norm": 13.154660100000475, | |
| "learning_rate": 6.70752033213837e-07, | |
| "logits/chosen": -0.88525390625, | |
| "logits/rejected": -0.93603515625, | |
| "logps/chosen": -467.0, | |
| "logps/rejected": -471.5, | |
| "loss": 0.5537, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -1.3125, | |
| "rewards/margins": 0.42266845703125, | |
| "rewards/rejected": -1.7353515625, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.22016, | |
| "grad_norm": 11.432368493570536, | |
| "learning_rate": 6.70122005186989e-07, | |
| "logits/chosen": -0.79443359375, | |
| "logits/rejected": -0.85107421875, | |
| "logps/chosen": -481.5, | |
| "logps/rejected": -537.0, | |
| "loss": 0.5738, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -1.56640625, | |
| "rewards/margins": 0.3822021484375, | |
| "rewards/rejected": -1.9443359375, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.22144, | |
| "grad_norm": 9.822098330205291, | |
| "learning_rate": 6.694855659453818e-07, | |
| "logits/chosen": -0.84765625, | |
| "logits/rejected": -0.84912109375, | |
| "logps/chosen": -511.75, | |
| "logps/rejected": -511.5, | |
| "loss": 0.5899, | |
| "rewards/accuracies": 0.6640625, | |
| "rewards/chosen": -1.70703125, | |
| "rewards/margins": 0.35321044921875, | |
| "rewards/rejected": -2.0595703125, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.22272, | |
| "grad_norm": 15.790896268251576, | |
| "learning_rate": 6.688427282352449e-07, | |
| "logits/chosen": -0.755859375, | |
| "logits/rejected": -0.80126953125, | |
| "logps/chosen": -492.25, | |
| "logps/rejected": -520.5, | |
| "loss": 0.5308, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -1.7265625, | |
| "rewards/margins": 0.4969482421875, | |
| "rewards/rejected": -2.2255859375, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 10.091430688703294, | |
| "learning_rate": 6.681935049309533e-07, | |
| "logits/chosen": -0.601318359375, | |
| "logits/rejected": -0.64306640625, | |
| "logps/chosen": -560.0, | |
| "logps/rejected": -612.5, | |
| "loss": 0.5713, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -2.1953125, | |
| "rewards/margins": 0.515625, | |
| "rewards/rejected": -2.7109375, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.22528, | |
| "grad_norm": 9.197272514290859, | |
| "learning_rate": 6.675379090347682e-07, | |
| "logits/chosen": -0.64501953125, | |
| "logits/rejected": -0.674560546875, | |
| "logps/chosen": -608.75, | |
| "logps/rejected": -654.0, | |
| "loss": 0.5305, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.6953125, | |
| "rewards/margins": 0.660400390625, | |
| "rewards/rejected": -3.35546875, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.22656, | |
| "grad_norm": 9.460572041512634, | |
| "learning_rate": 6.668759536765779e-07, | |
| "logits/chosen": -0.6484375, | |
| "logits/rejected": -0.671630859375, | |
| "logps/chosen": -600.5, | |
| "logps/rejected": -642.0, | |
| "loss": 0.567, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -2.6953125, | |
| "rewards/margins": 0.5928955078125, | |
| "rewards/rejected": -3.291015625, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.22784, | |
| "grad_norm": 40.385948070557, | |
| "learning_rate": 6.662076521136337e-07, | |
| "logits/chosen": -0.5048828125, | |
| "logits/rejected": -0.554443359375, | |
| "logps/chosen": -604.0, | |
| "logps/rejected": -661.75, | |
| "loss": 0.5893, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -2.9296875, | |
| "rewards/margins": 0.66015625, | |
| "rewards/rejected": -3.58984375, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.22912, | |
| "grad_norm": 22.98261625779329, | |
| "learning_rate": 6.655330177302857e-07, | |
| "logits/chosen": -0.63525390625, | |
| "logits/rejected": -0.71435546875, | |
| "logps/chosen": -649.5, | |
| "logps/rejected": -725.0, | |
| "loss": 0.5748, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.234375, | |
| "rewards/margins": 0.593994140625, | |
| "rewards/rejected": -3.830078125, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2304, | |
| "grad_norm": 8.847574852157168, | |
| "learning_rate": 6.64852064037713e-07, | |
| "logits/chosen": -0.6103515625, | |
| "logits/rejected": -0.654296875, | |
| "logps/chosen": -607.0, | |
| "logps/rejected": -687.0, | |
| "loss": 0.5131, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.97265625, | |
| "rewards/margins": 0.7249755859375, | |
| "rewards/rejected": -3.6953125, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23168, | |
| "grad_norm": 9.669090706723097, | |
| "learning_rate": 6.641648046736549e-07, | |
| "logits/chosen": -0.62060546875, | |
| "logits/rejected": -0.67236328125, | |
| "logps/chosen": -644.5, | |
| "logps/rejected": -689.5, | |
| "loss": 0.5648, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": -3.06640625, | |
| "rewards/margins": 0.6990966796875, | |
| "rewards/rejected": -3.765625, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.23296, | |
| "grad_norm": 9.666414731608148, | |
| "learning_rate": 6.634712534021367e-07, | |
| "logits/chosen": -0.584716796875, | |
| "logits/rejected": -0.6240234375, | |
| "logps/chosen": -589.0, | |
| "logps/rejected": -635.5, | |
| "loss": 0.5225, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -2.626953125, | |
| "rewards/margins": 0.66064453125, | |
| "rewards/rejected": -3.291015625, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.23424, | |
| "grad_norm": 9.206989504196308, | |
| "learning_rate": 6.627714241131942e-07, | |
| "logits/chosen": -0.568115234375, | |
| "logits/rejected": -0.587158203125, | |
| "logps/chosen": -609.25, | |
| "logps/rejected": -633.5, | |
| "loss": 0.5513, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -2.73828125, | |
| "rewards/margins": 0.598876953125, | |
| "rewards/rejected": -3.333984375, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.23552, | |
| "grad_norm": 39.055918050847936, | |
| "learning_rate": 6.620653308225959e-07, | |
| "logits/chosen": -0.563232421875, | |
| "logits/rejected": -0.642578125, | |
| "logps/chosen": -568.5, | |
| "logps/rejected": -614.5, | |
| "loss": 0.6409, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -2.60546875, | |
| "rewards/margins": 0.454833984375, | |
| "rewards/rejected": -3.060546875, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2368, | |
| "grad_norm": 15.55460973147395, | |
| "learning_rate": 6.613529876715619e-07, | |
| "logits/chosen": -0.669189453125, | |
| "logits/rejected": -0.71875, | |
| "logps/chosen": -591.5, | |
| "logps/rejected": -631.0, | |
| "loss": 0.529, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -2.4326171875, | |
| "rewards/margins": 0.650634765625, | |
| "rewards/rejected": -3.080078125, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.23808, | |
| "grad_norm": 8.09681757649616, | |
| "learning_rate": 6.606344089264805e-07, | |
| "logits/chosen": -0.70849609375, | |
| "logits/rejected": -0.7412109375, | |
| "logps/chosen": -568.25, | |
| "logps/rejected": -573.5, | |
| "loss": 0.5365, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -2.1474609375, | |
| "rewards/margins": 0.56982421875, | |
| "rewards/rejected": -2.716796875, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.23936, | |
| "grad_norm": 8.312763295596206, | |
| "learning_rate": 6.599096089786234e-07, | |
| "logits/chosen": -0.8388671875, | |
| "logits/rejected": -0.873046875, | |
| "logps/chosen": -519.5, | |
| "logps/rejected": -573.25, | |
| "loss": 0.5779, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.048828125, | |
| "rewards/margins": 0.4176025390625, | |
| "rewards/rejected": -2.4677734375, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.24064, | |
| "grad_norm": 8.587134768192069, | |
| "learning_rate": 6.591786023438564e-07, | |
| "logits/chosen": -0.638519287109375, | |
| "logits/rejected": -0.7158203125, | |
| "logps/chosen": -489.75, | |
| "logps/rejected": -564.75, | |
| "loss": 0.5731, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -1.8544921875, | |
| "rewards/margins": 0.45361328125, | |
| "rewards/rejected": -2.3056640625, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.24192, | |
| "grad_norm": 13.116768076581428, | |
| "learning_rate": 6.584414036623496e-07, | |
| "logits/chosen": -0.89697265625, | |
| "logits/rejected": -0.98388671875, | |
| "logps/chosen": -494.0, | |
| "logps/rejected": -540.0, | |
| "loss": 0.5309, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.63671875, | |
| "rewards/margins": 0.5660400390625, | |
| "rewards/rejected": -2.203125, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2432, | |
| "grad_norm": 9.963354836403468, | |
| "learning_rate": 6.576980276982832e-07, | |
| "logits/chosen": -0.82666015625, | |
| "logits/rejected": -0.91162109375, | |
| "logps/chosen": -469.5, | |
| "logps/rejected": -498.25, | |
| "loss": 0.5733, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -1.67578125, | |
| "rewards/margins": 0.40509033203125, | |
| "rewards/rejected": -2.08203125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.24448, | |
| "grad_norm": 9.679252853242984, | |
| "learning_rate": 6.569484893395527e-07, | |
| "logits/chosen": -0.92138671875, | |
| "logits/rejected": -0.974609375, | |
| "logps/chosen": -504.75, | |
| "logps/rejected": -556.5, | |
| "loss": 0.559, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.8671875, | |
| "rewards/margins": 0.467041015625, | |
| "rewards/rejected": -2.333984375, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.24576, | |
| "grad_norm": 11.180357368507915, | |
| "learning_rate": 6.561928035974705e-07, | |
| "logits/chosen": -0.875, | |
| "logits/rejected": -0.89599609375, | |
| "logps/chosen": -547.25, | |
| "logps/rejected": -599.25, | |
| "loss": 0.5339, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.015625, | |
| "rewards/margins": 0.6204833984375, | |
| "rewards/rejected": -2.634765625, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.24704, | |
| "grad_norm": 11.453973983300482, | |
| "learning_rate": 6.55430985606465e-07, | |
| "logits/chosen": -0.84228515625, | |
| "logits/rejected": -0.87890625, | |
| "logps/chosen": -550.25, | |
| "logps/rejected": -584.5, | |
| "loss": 0.5265, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.18359375, | |
| "rewards/margins": 0.672607421875, | |
| "rewards/rejected": -2.857421875, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.24832, | |
| "grad_norm": 19.413502192585955, | |
| "learning_rate": 6.546630506237778e-07, | |
| "logits/chosen": -0.7080078125, | |
| "logits/rejected": -0.7685546875, | |
| "logps/chosen": -565.0, | |
| "logps/rejected": -626.75, | |
| "loss": 0.5264, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.2333984375, | |
| "rewards/margins": 0.642578125, | |
| "rewards/rejected": -2.873046875, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2496, | |
| "grad_norm": 11.99148623186324, | |
| "learning_rate": 6.538890140291578e-07, | |
| "logits/chosen": -0.73046875, | |
| "logits/rejected": -0.75048828125, | |
| "logps/chosen": -602.25, | |
| "logps/rejected": -628.75, | |
| "loss": 0.5333, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.5947265625, | |
| "rewards/margins": 0.683349609375, | |
| "rewards/rejected": -3.27734375, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.25088, | |
| "grad_norm": 10.31797612543643, | |
| "learning_rate": 6.531088913245536e-07, | |
| "logits/chosen": -0.64794921875, | |
| "logits/rejected": -0.716796875, | |
| "logps/chosen": -635.75, | |
| "logps/rejected": -716.0, | |
| "loss": 0.4923, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.15625, | |
| "rewards/margins": 0.8427734375, | |
| "rewards/rejected": -3.99609375, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.25216, | |
| "grad_norm": 87.52079256871755, | |
| "learning_rate": 6.523226981338026e-07, | |
| "logits/chosen": -0.68359375, | |
| "logits/rejected": -0.70458984375, | |
| "logps/chosen": -738.0, | |
| "logps/rejected": -774.0, | |
| "loss": 0.6523, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -4.09765625, | |
| "rewards/margins": 0.696044921875, | |
| "rewards/rejected": -4.794921875, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.25344, | |
| "grad_norm": 72.5293740432998, | |
| "learning_rate": 6.515304502023185e-07, | |
| "logits/chosen": -0.5673828125, | |
| "logits/rejected": -0.601318359375, | |
| "logps/chosen": -806.5, | |
| "logps/rejected": -903.0, | |
| "loss": 0.5506, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.96484375, | |
| "rewards/margins": 1.0079345703125, | |
| "rewards/rejected": -5.97265625, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.25472, | |
| "grad_norm": 44.679991429690986, | |
| "learning_rate": 6.507321633967758e-07, | |
| "logits/chosen": -0.649658203125, | |
| "logits/rejected": -0.6904296875, | |
| "logps/chosen": -837.0, | |
| "logps/rejected": -915.0, | |
| "loss": 0.499, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -4.8671875, | |
| "rewards/margins": 1.01953125, | |
| "rewards/rejected": -5.88671875, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 59.397830971941254, | |
| "learning_rate": 6.499278537047919e-07, | |
| "logits/chosen": -0.69677734375, | |
| "logits/rejected": -0.72998046875, | |
| "logps/chosen": -831.0, | |
| "logps/rejected": -920.0, | |
| "loss": 0.5369, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -5.015625, | |
| "rewards/margins": 0.87841796875, | |
| "rewards/rejected": -5.8828125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "eval_logits/chosen": -0.6611328125, | |
| "eval_logits/rejected": -0.733642578125, | |
| "eval_logps/chosen": -767.75, | |
| "eval_logps/rejected": -825.0, | |
| "eval_loss": 0.6021875143051147, | |
| "eval_rewards/accuracies": 0.69921875, | |
| "eval_rewards/chosen": -4.474609375, | |
| "eval_rewards/margins": 0.75811767578125, | |
| "eval_rewards/rejected": -5.23046875, | |
| "eval_runtime": 27.6962, | |
| "eval_samples_per_second": 18.053, | |
| "eval_steps_per_second": 0.578, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.25728, | |
| "grad_norm": 53.24908848058674, | |
| "learning_rate": 6.491175372346071e-07, | |
| "logits/chosen": -0.671875, | |
| "logits/rejected": -0.701171875, | |
| "logps/chosen": -777.5, | |
| "logps/rejected": -874.5, | |
| "loss": 0.5742, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -4.5703125, | |
| "rewards/margins": 0.83984375, | |
| "rewards/rejected": -5.41015625, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.25856, | |
| "grad_norm": 96.55947068044792, | |
| "learning_rate": 6.483012302147617e-07, | |
| "logits/chosen": -0.691162109375, | |
| "logits/rejected": -0.73388671875, | |
| "logps/chosen": -718.0, | |
| "logps/rejected": -742.0, | |
| "loss": 0.715, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": -3.8984375, | |
| "rewards/margins": 0.52960205078125, | |
| "rewards/rejected": -4.42578125, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.25984, | |
| "grad_norm": 11.651199302119567, | |
| "learning_rate": 6.474789489937715e-07, | |
| "logits/chosen": -0.7294921875, | |
| "logits/rejected": -0.79931640625, | |
| "logps/chosen": -654.0, | |
| "logps/rejected": -739.0, | |
| "loss": 0.4917, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.275390625, | |
| "rewards/margins": 0.88525390625, | |
| "rewards/rejected": -4.16015625, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.26112, | |
| "grad_norm": 22.98647680803771, | |
| "learning_rate": 6.466507100397998e-07, | |
| "logits/chosen": -0.763671875, | |
| "logits/rejected": -0.79931640625, | |
| "logps/chosen": -605.0, | |
| "logps/rejected": -657.5, | |
| "loss": 0.5948, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -2.662109375, | |
| "rewards/margins": 0.5657958984375, | |
| "rewards/rejected": -3.23046875, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2624, | |
| "grad_norm": 8.334836762281965, | |
| "learning_rate": 6.458165299403282e-07, | |
| "logits/chosen": -0.702880859375, | |
| "logits/rejected": -0.7275390625, | |
| "logps/chosen": -576.0, | |
| "logps/rejected": -620.5, | |
| "loss": 0.5457, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.412109375, | |
| "rewards/margins": 0.678466796875, | |
| "rewards/rejected": -3.08984375, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.26368, | |
| "grad_norm": 8.43068729699347, | |
| "learning_rate": 6.449764254018236e-07, | |
| "logits/chosen": -0.8203125, | |
| "logits/rejected": -0.89306640625, | |
| "logps/chosen": -557.5, | |
| "logps/rejected": -619.5, | |
| "loss": 0.5399, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -2.326171875, | |
| "rewards/margins": 0.57373046875, | |
| "rewards/rejected": -2.8984375, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.26496, | |
| "grad_norm": 15.681934350374608, | |
| "learning_rate": 6.441304132494045e-07, | |
| "logits/chosen": -0.89111328125, | |
| "logits/rejected": -0.939453125, | |
| "logps/chosen": -506.5, | |
| "logps/rejected": -534.75, | |
| "loss": 0.527, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -1.857421875, | |
| "rewards/margins": 0.568603515625, | |
| "rewards/rejected": -2.427734375, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.26624, | |
| "grad_norm": 9.604421155152727, | |
| "learning_rate": 6.432785104265033e-07, | |
| "logits/chosen": -0.9912109375, | |
| "logits/rejected": -1.02783203125, | |
| "logps/chosen": -475.25, | |
| "logps/rejected": -503.25, | |
| "loss": 0.5645, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.501953125, | |
| "rewards/margins": 0.432373046875, | |
| "rewards/rejected": -1.9345703125, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.26752, | |
| "grad_norm": 14.360891651779323, | |
| "learning_rate": 6.424207339945278e-07, | |
| "logits/chosen": -0.982421875, | |
| "logits/rejected": -1.02734375, | |
| "logps/chosen": -465.75, | |
| "logps/rejected": -502.25, | |
| "loss": 0.5318, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -1.5576171875, | |
| "rewards/margins": 0.544677734375, | |
| "rewards/rejected": -2.1025390625, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.2688, | |
| "grad_norm": 13.445955498385377, | |
| "learning_rate": 6.41557101132518e-07, | |
| "logits/chosen": -0.91162109375, | |
| "logits/rejected": -0.9501953125, | |
| "logps/chosen": -472.5, | |
| "logps/rejected": -518.5, | |
| "loss": 0.5033, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.5302734375, | |
| "rewards/margins": 0.611328125, | |
| "rewards/rejected": -2.142578125, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27008, | |
| "grad_norm": 12.723099981494066, | |
| "learning_rate": 6.406876291368041e-07, | |
| "logits/chosen": -0.9912109375, | |
| "logits/rejected": -1.0634765625, | |
| "logps/chosen": -497.0, | |
| "logps/rejected": -556.5, | |
| "loss": 0.4932, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.623046875, | |
| "rewards/margins": 0.616455078125, | |
| "rewards/rejected": -2.2373046875, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.27136, | |
| "grad_norm": 9.020187932187099, | |
| "learning_rate": 6.398123354206582e-07, | |
| "logits/chosen": -0.91845703125, | |
| "logits/rejected": -0.98974609375, | |
| "logps/chosen": -500.75, | |
| "logps/rejected": -560.0, | |
| "loss": 0.4941, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -1.8056640625, | |
| "rewards/margins": 0.711669921875, | |
| "rewards/rejected": -2.517578125, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.27264, | |
| "grad_norm": 8.556186928836153, | |
| "learning_rate": 6.389312375139469e-07, | |
| "logits/chosen": -0.9033203125, | |
| "logits/rejected": -0.96142578125, | |
| "logps/chosen": -594.25, | |
| "logps/rejected": -617.5, | |
| "loss": 0.555, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.0859375, | |
| "rewards/margins": 0.615234375, | |
| "rewards/rejected": -2.69921875, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.27392, | |
| "grad_norm": 13.39244807934873, | |
| "learning_rate": 6.380443530627797e-07, | |
| "logits/chosen": -0.85986328125, | |
| "logits/rejected": -0.91552734375, | |
| "logps/chosen": -502.25, | |
| "logps/rejected": -560.75, | |
| "loss": 0.4989, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.9384765625, | |
| "rewards/margins": 0.70263671875, | |
| "rewards/rejected": -2.63671875, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2752, | |
| "grad_norm": 8.777635188116983, | |
| "learning_rate": 6.371516998291552e-07, | |
| "logits/chosen": -0.80126953125, | |
| "logits/rejected": -0.912109375, | |
| "logps/chosen": -518.5, | |
| "logps/rejected": -607.5, | |
| "loss": 0.4933, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -2.1640625, | |
| "rewards/margins": 0.726806640625, | |
| "rewards/rejected": -2.890625, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.27648, | |
| "grad_norm": 16.262313588962954, | |
| "learning_rate": 6.362532956906059e-07, | |
| "logits/chosen": -0.7509765625, | |
| "logits/rejected": -0.8251953125, | |
| "logps/chosen": -543.0, | |
| "logps/rejected": -592.0, | |
| "loss": 0.4902, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.212890625, | |
| "rewards/margins": 0.656982421875, | |
| "rewards/rejected": -2.87109375, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.27776, | |
| "grad_norm": 11.250156251667049, | |
| "learning_rate": 6.353491586398404e-07, | |
| "logits/chosen": -0.720703125, | |
| "logits/rejected": -0.779296875, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -639.5, | |
| "loss": 0.5265, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -2.626953125, | |
| "rewards/margins": 0.7470703125, | |
| "rewards/rejected": -3.373046875, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.27904, | |
| "grad_norm": 9.749001446309522, | |
| "learning_rate": 6.344393067843826e-07, | |
| "logits/chosen": -0.6494140625, | |
| "logits/rejected": -0.721923828125, | |
| "logps/chosen": -622.25, | |
| "logps/rejected": -703.5, | |
| "loss": 0.5042, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.078125, | |
| "rewards/margins": 0.8087158203125, | |
| "rewards/rejected": -3.888671875, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.28032, | |
| "grad_norm": 13.338732962845118, | |
| "learning_rate": 6.335237583462083e-07, | |
| "logits/chosen": -0.67822265625, | |
| "logits/rejected": -0.67333984375, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -834.5, | |
| "loss": 0.5743, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -3.390625, | |
| "rewards/margins": 1.11883544921875, | |
| "rewards/rejected": -4.517578125, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.2816, | |
| "grad_norm": 23.59756440330084, | |
| "learning_rate": 6.326025316613823e-07, | |
| "logits/chosen": -0.515380859375, | |
| "logits/rejected": -0.543701171875, | |
| "logps/chosen": -666.0, | |
| "logps/rejected": -739.5, | |
| "loss": 0.5342, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -3.46484375, | |
| "rewards/margins": 0.9072265625, | |
| "rewards/rejected": -4.375, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.28288, | |
| "grad_norm": 38.39849197226403, | |
| "learning_rate": 6.316756451796894e-07, | |
| "logits/chosen": -0.611572265625, | |
| "logits/rejected": -0.69287109375, | |
| "logps/chosen": -653.0, | |
| "logps/rejected": -741.5, | |
| "loss": 0.5483, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.568359375, | |
| "rewards/margins": 0.7652587890625, | |
| "rewards/rejected": -4.328125, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.28416, | |
| "grad_norm": 15.339038520285099, | |
| "learning_rate": 6.307431174642653e-07, | |
| "logits/chosen": -0.654296875, | |
| "logits/rejected": -0.69580078125, | |
| "logps/chosen": -693.0, | |
| "logps/rejected": -754.5, | |
| "loss": 0.5019, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.5, | |
| "rewards/margins": 0.99560546875, | |
| "rewards/rejected": -4.494140625, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.28544, | |
| "grad_norm": 8.374650607981456, | |
| "learning_rate": 6.298049671912254e-07, | |
| "logits/chosen": -0.57861328125, | |
| "logits/rejected": -0.62353515625, | |
| "logps/chosen": -627.0, | |
| "logps/rejected": -704.5, | |
| "loss": 0.4972, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.31640625, | |
| "rewards/margins": 0.92919921875, | |
| "rewards/rejected": -4.244140625, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.28672, | |
| "grad_norm": 45.963116084804696, | |
| "learning_rate": 6.2886121314929e-07, | |
| "logits/chosen": -0.609619140625, | |
| "logits/rejected": -0.6494140625, | |
| "logps/chosen": -639.5, | |
| "logps/rejected": -666.25, | |
| "loss": 0.7107, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -3.390625, | |
| "rewards/margins": 0.4444580078125, | |
| "rewards/rejected": -3.8359375, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 7.958096573222809, | |
| "learning_rate": 6.279118742394089e-07, | |
| "logits/chosen": -0.588623046875, | |
| "logits/rejected": -0.64306640625, | |
| "logps/chosen": -624.5, | |
| "logps/rejected": -695.5, | |
| "loss": 0.4767, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -2.857421875, | |
| "rewards/margins": 0.83056640625, | |
| "rewards/rejected": -3.689453125, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.28928, | |
| "grad_norm": 9.611100210392358, | |
| "learning_rate": 6.269569694743816e-07, | |
| "logits/chosen": -0.6396484375, | |
| "logits/rejected": -0.68701171875, | |
| "logps/chosen": -564.5, | |
| "logps/rejected": -650.0, | |
| "loss": 0.4786, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -2.638671875, | |
| "rewards/margins": 0.86083984375, | |
| "rewards/rejected": -3.49609375, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.29056, | |
| "grad_norm": 9.097155318325184, | |
| "learning_rate": 6.259965179784779e-07, | |
| "logits/chosen": -0.71484375, | |
| "logits/rejected": -0.76611328125, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -672.5, | |
| "loss": 0.4834, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.845703125, | |
| "rewards/margins": 0.808837890625, | |
| "rewards/rejected": -3.658203125, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.29184, | |
| "grad_norm": 11.493217517064283, | |
| "learning_rate": 6.250305389870541e-07, | |
| "logits/chosen": -0.7099609375, | |
| "logits/rejected": -0.77001953125, | |
| "logps/chosen": -602.5, | |
| "logps/rejected": -667.5, | |
| "loss": 0.5058, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.642578125, | |
| "rewards/margins": 0.72119140625, | |
| "rewards/rejected": -3.36328125, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.29312, | |
| "grad_norm": 9.437777289578113, | |
| "learning_rate": 6.240590518461678e-07, | |
| "logits/chosen": -0.62939453125, | |
| "logits/rejected": -0.7001953125, | |
| "logps/chosen": -529.75, | |
| "logps/rejected": -617.0, | |
| "loss": 0.4882, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -2.3828125, | |
| "rewards/margins": 0.7900390625, | |
| "rewards/rejected": -3.171875, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2944, | |
| "grad_norm": 8.312134657078936, | |
| "learning_rate": 6.230820760121904e-07, | |
| "logits/chosen": -0.578369140625, | |
| "logits/rejected": -0.627197265625, | |
| "logps/chosen": -559.75, | |
| "logps/rejected": -656.5, | |
| "loss": 0.4768, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.5625, | |
| "rewards/margins": 0.951416015625, | |
| "rewards/rejected": -3.51171875, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.29568, | |
| "grad_norm": 11.986917072887415, | |
| "learning_rate": 6.220996310514181e-07, | |
| "logits/chosen": -0.5537109375, | |
| "logits/rejected": -0.60888671875, | |
| "logps/chosen": -616.0, | |
| "logps/rejected": -668.5, | |
| "loss": 0.576, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -2.873046875, | |
| "rewards/margins": 0.666015625, | |
| "rewards/rejected": -3.537109375, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.29696, | |
| "grad_norm": 12.172332354528226, | |
| "learning_rate": 6.21111736639679e-07, | |
| "logits/chosen": -0.610595703125, | |
| "logits/rejected": -0.695068359375, | |
| "logps/chosen": -535.0, | |
| "logps/rejected": -651.0, | |
| "loss": 0.4534, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.4560546875, | |
| "rewards/margins": 0.952392578125, | |
| "rewards/rejected": -3.408203125, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.29824, | |
| "grad_norm": 14.200772198441506, | |
| "learning_rate": 6.201184125619403e-07, | |
| "logits/chosen": -0.520263671875, | |
| "logits/rejected": -0.56201171875, | |
| "logps/chosen": -595.0, | |
| "logps/rejected": -654.0, | |
| "loss": 0.4812, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.56640625, | |
| "rewards/margins": 0.9599609375, | |
| "rewards/rejected": -3.52734375, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.29952, | |
| "grad_norm": 13.457335334304465, | |
| "learning_rate": 6.191196787119104e-07, | |
| "logits/chosen": -0.5048828125, | |
| "logits/rejected": -0.588134765625, | |
| "logps/chosen": -642.5, | |
| "logps/rejected": -732.5, | |
| "loss": 0.4973, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.150390625, | |
| "rewards/margins": 0.856689453125, | |
| "rewards/rejected": -4.005859375, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3008, | |
| "grad_norm": 10.321622368203395, | |
| "learning_rate": 6.181155550916422e-07, | |
| "logits/chosen": -0.4512939453125, | |
| "logits/rejected": -0.501708984375, | |
| "logps/chosen": -647.5, | |
| "logps/rejected": -769.5, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.408203125, | |
| "rewards/margins": 0.9677734375, | |
| "rewards/rejected": -4.37109375, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.30208, | |
| "grad_norm": 8.858095075192827, | |
| "learning_rate": 6.171060618111317e-07, | |
| "logits/chosen": -0.44775390625, | |
| "logits/rejected": -0.4814453125, | |
| "logps/chosen": -666.0, | |
| "logps/rejected": -779.0, | |
| "loss": 0.4962, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.6328125, | |
| "rewards/margins": 0.91748046875, | |
| "rewards/rejected": -4.556640625, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.30336, | |
| "grad_norm": 18.70168630039342, | |
| "learning_rate": 6.160912190879145e-07, | |
| "logits/chosen": -0.5400390625, | |
| "logits/rejected": -0.544921875, | |
| "logps/chosen": -651.0, | |
| "logps/rejected": -721.5, | |
| "loss": 0.5413, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.35546875, | |
| "rewards/margins": 0.91790771484375, | |
| "rewards/rejected": -4.275390625, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.30464, | |
| "grad_norm": 9.749493306506885, | |
| "learning_rate": 6.150710472466629e-07, | |
| "logits/chosen": -0.45068359375, | |
| "logits/rejected": -0.485595703125, | |
| "logps/chosen": -638.0, | |
| "logps/rejected": -719.5, | |
| "loss": 0.4751, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.2265625, | |
| "rewards/margins": 0.86181640625, | |
| "rewards/rejected": -4.0859375, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.30592, | |
| "grad_norm": 20.80698350471745, | |
| "learning_rate": 6.140455667187765e-07, | |
| "logits/chosen": -0.46551513671875, | |
| "logits/rejected": -0.50634765625, | |
| "logps/chosen": -761.0, | |
| "logps/rejected": -825.0, | |
| "loss": 0.5784, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.84375, | |
| "rewards/margins": 0.728759765625, | |
| "rewards/rejected": -4.578125, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.3072, | |
| "grad_norm": 8.953639678478353, | |
| "learning_rate": 6.13014798041975e-07, | |
| "logits/chosen": -0.417236328125, | |
| "logits/rejected": -0.4698486328125, | |
| "logps/chosen": -658.0, | |
| "logps/rejected": -751.0, | |
| "loss": 0.5047, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.318359375, | |
| "rewards/margins": 0.96484375, | |
| "rewards/rejected": -4.283203125, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.30848, | |
| "grad_norm": 10.845686942793368, | |
| "learning_rate": 6.119787618598854e-07, | |
| "logits/chosen": -0.4853668212890625, | |
| "logits/rejected": -0.5223388671875, | |
| "logps/chosen": -655.75, | |
| "logps/rejected": -701.0, | |
| "loss": 0.5384, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.103515625, | |
| "rewards/margins": 0.777099609375, | |
| "rewards/rejected": -3.880859375, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.30976, | |
| "grad_norm": 9.846249269177688, | |
| "learning_rate": 6.109374789216295e-07, | |
| "logits/chosen": -0.4921875, | |
| "logits/rejected": -0.587646484375, | |
| "logps/chosen": -609.0, | |
| "logps/rejected": -722.0, | |
| "loss": 0.4894, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -2.994140625, | |
| "rewards/margins": 0.99072265625, | |
| "rewards/rejected": -3.9765625, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.31104, | |
| "grad_norm": 8.39816655306362, | |
| "learning_rate": 6.098909700814082e-07, | |
| "logits/chosen": -0.564208984375, | |
| "logits/rejected": -0.618408203125, | |
| "logps/chosen": -498.25, | |
| "logps/rejected": -594.25, | |
| "loss": 0.523, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.3525390625, | |
| "rewards/margins": 0.72216796875, | |
| "rewards/rejected": -3.078125, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.31232, | |
| "grad_norm": 15.315342312273753, | |
| "learning_rate": 6.08839256298083e-07, | |
| "logits/chosen": -0.510986328125, | |
| "logits/rejected": -0.580322265625, | |
| "logps/chosen": -574.75, | |
| "logps/rejected": -668.0, | |
| "loss": 0.4818, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.73046875, | |
| "rewards/margins": 0.8583984375, | |
| "rewards/rejected": -3.591796875, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3136, | |
| "grad_norm": 11.767718051696765, | |
| "learning_rate": 6.077823586347579e-07, | |
| "logits/chosen": -0.48681640625, | |
| "logits/rejected": -0.5322265625, | |
| "logps/chosen": -561.0, | |
| "logps/rejected": -622.0, | |
| "loss": 0.5154, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.603515625, | |
| "rewards/margins": 0.7664794921875, | |
| "rewards/rejected": -3.3671875, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.31488, | |
| "grad_norm": 9.13068372680758, | |
| "learning_rate": 6.067202982583559e-07, | |
| "logits/chosen": -0.5447998046875, | |
| "logits/rejected": -0.6015625, | |
| "logps/chosen": -654.5, | |
| "logps/rejected": -729.0, | |
| "loss": 0.5201, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.890625, | |
| "rewards/margins": 0.822998046875, | |
| "rewards/rejected": -3.7109375, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.31616, | |
| "grad_norm": 8.828236079236284, | |
| "learning_rate": 6.056530964391961e-07, | |
| "logits/chosen": -0.55419921875, | |
| "logits/rejected": -0.6162109375, | |
| "logps/chosen": -605.75, | |
| "logps/rejected": -679.5, | |
| "loss": 0.4972, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.8447265625, | |
| "rewards/margins": 0.84423828125, | |
| "rewards/rejected": -3.6875, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.31744, | |
| "grad_norm": 9.350860751825014, | |
| "learning_rate": 6.04580774550567e-07, | |
| "logits/chosen": -0.501220703125, | |
| "logits/rejected": -0.592529296875, | |
| "logps/chosen": -632.5, | |
| "logps/rejected": -708.0, | |
| "loss": 0.4995, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -2.921875, | |
| "rewards/margins": 0.94384765625, | |
| "rewards/rejected": -3.8671875, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.31872, | |
| "grad_norm": 9.838783187292746, | |
| "learning_rate": 6.035033540682994e-07, | |
| "logits/chosen": -0.5537109375, | |
| "logits/rejected": -0.60009765625, | |
| "logps/chosen": -606.0, | |
| "logps/rejected": -660.5, | |
| "loss": 0.5366, | |
| "rewards/accuracies": 0.6796875, | |
| "rewards/chosen": -2.8203125, | |
| "rewards/margins": 0.7607421875, | |
| "rewards/rejected": -3.580078125, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 10.098149943213269, | |
| "learning_rate": 6.02420856570335e-07, | |
| "logits/chosen": -0.512939453125, | |
| "logits/rejected": -0.553955078125, | |
| "logps/chosen": -584.0, | |
| "logps/rejected": -677.5, | |
| "loss": 0.4851, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.765625, | |
| "rewards/margins": 0.787109375, | |
| "rewards/rejected": -3.5546875, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32128, | |
| "grad_norm": 11.938073471759608, | |
| "learning_rate": 6.013333037362958e-07, | |
| "logits/chosen": -0.53125, | |
| "logits/rejected": -0.58447265625, | |
| "logps/chosen": -637.0, | |
| "logps/rejected": -707.0, | |
| "loss": 0.5481, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -2.98046875, | |
| "rewards/margins": 0.731689453125, | |
| "rewards/rejected": -3.71484375, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.32256, | |
| "grad_norm": 8.629915885629975, | |
| "learning_rate": 6.002407173470485e-07, | |
| "logits/chosen": -0.52685546875, | |
| "logits/rejected": -0.5693359375, | |
| "logps/chosen": -647.0, | |
| "logps/rejected": -739.5, | |
| "loss": 0.4979, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.0390625, | |
| "rewards/margins": 0.815185546875, | |
| "rewards/rejected": -3.85546875, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.32384, | |
| "grad_norm": 15.164274031135337, | |
| "learning_rate": 5.991431192842692e-07, | |
| "logits/chosen": -0.47216796875, | |
| "logits/rejected": -0.52001953125, | |
| "logps/chosen": -640.5, | |
| "logps/rejected": -726.0, | |
| "loss": 0.455, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.05859375, | |
| "rewards/margins": 0.89501953125, | |
| "rewards/rejected": -3.951171875, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.32512, | |
| "grad_norm": 13.467159822471947, | |
| "learning_rate": 5.980405315300045e-07, | |
| "logits/chosen": -0.395751953125, | |
| "logits/rejected": -0.4453125, | |
| "logps/chosen": -634.5, | |
| "logps/rejected": -718.0, | |
| "loss": 0.5203, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -3.119140625, | |
| "rewards/margins": 0.7841796875, | |
| "rewards/rejected": -3.90625, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3264, | |
| "grad_norm": 9.015056279995315, | |
| "learning_rate": 5.969329761662318e-07, | |
| "logits/chosen": -0.38385009765625, | |
| "logits/rejected": -0.4581298828125, | |
| "logps/chosen": -625.0, | |
| "logps/rejected": -722.5, | |
| "loss": 0.5035, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.091796875, | |
| "rewards/margins": 0.9658203125, | |
| "rewards/rejected": -4.05859375, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.32768, | |
| "grad_norm": 9.520970646405912, | |
| "learning_rate": 5.958204753744171e-07, | |
| "logits/chosen": -0.4287109375, | |
| "logits/rejected": -0.486572265625, | |
| "logps/chosen": -615.5, | |
| "logps/rejected": -700.0, | |
| "loss": 0.5045, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.001953125, | |
| "rewards/margins": 0.9034423828125, | |
| "rewards/rejected": -3.90625, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.32896, | |
| "grad_norm": 22.42105812552605, | |
| "learning_rate": 5.9470305143507e-07, | |
| "logits/chosen": -0.405029296875, | |
| "logits/rejected": -0.456787109375, | |
| "logps/chosen": -684.0, | |
| "logps/rejected": -746.0, | |
| "loss": 0.6086, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.5625, | |
| "rewards/margins": 0.7578125, | |
| "rewards/rejected": -4.3203125, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.33024, | |
| "grad_norm": 20.00218984655251, | |
| "learning_rate": 5.935807267272985e-07, | |
| "logits/chosen": -0.4755859375, | |
| "logits/rejected": -0.497802734375, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -725.5, | |
| "loss": 0.5487, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.1875, | |
| "rewards/margins": 0.792236328125, | |
| "rewards/rejected": -3.978515625, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.33152, | |
| "grad_norm": 10.403435140293421, | |
| "learning_rate": 5.924535237283598e-07, | |
| "logits/chosen": -0.47998046875, | |
| "logits/rejected": -0.528076171875, | |
| "logps/chosen": -617.5, | |
| "logps/rejected": -688.0, | |
| "loss": 0.4796, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8671875, | |
| "rewards/margins": 0.8173828125, | |
| "rewards/rejected": -3.68359375, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3328, | |
| "grad_norm": 8.359013118624633, | |
| "learning_rate": 5.913214650132112e-07, | |
| "logits/chosen": -0.43115234375, | |
| "logits/rejected": -0.514892578125, | |
| "logps/chosen": -580.75, | |
| "logps/rejected": -687.0, | |
| "loss": 0.4962, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.859375, | |
| "rewards/margins": 0.9130859375, | |
| "rewards/rejected": -3.76953125, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33408, | |
| "grad_norm": 9.989053690781105, | |
| "learning_rate": 5.901845732540568e-07, | |
| "logits/chosen": -0.488037109375, | |
| "logits/rejected": -0.51708984375, | |
| "logps/chosen": -659.0, | |
| "logps/rejected": -727.0, | |
| "loss": 0.4965, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.03125, | |
| "rewards/margins": 1.017578125, | |
| "rewards/rejected": -4.044921875, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.33536, | |
| "grad_norm": 9.229777464931137, | |
| "learning_rate": 5.890428712198945e-07, | |
| "logits/chosen": -0.4755859375, | |
| "logits/rejected": -0.4970703125, | |
| "logps/chosen": -673.5, | |
| "logps/rejected": -766.0, | |
| "loss": 0.4583, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.19921875, | |
| "rewards/margins": 1.03955078125, | |
| "rewards/rejected": -4.2421875, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.33664, | |
| "grad_norm": 11.937311630757574, | |
| "learning_rate": 5.878963817760597e-07, | |
| "logits/chosen": -0.454833984375, | |
| "logits/rejected": -0.519775390625, | |
| "logps/chosen": -642.5, | |
| "logps/rejected": -735.5, | |
| "loss": 0.5139, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.205078125, | |
| "rewards/margins": 0.8829345703125, | |
| "rewards/rejected": -4.0859375, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.33792, | |
| "grad_norm": 10.230639726258993, | |
| "learning_rate": 5.867451278837666e-07, | |
| "logits/chosen": -0.355499267578125, | |
| "logits/rejected": -0.408599853515625, | |
| "logps/chosen": -642.5, | |
| "logps/rejected": -704.0, | |
| "loss": 0.5486, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -3.228515625, | |
| "rewards/margins": 0.77197265625, | |
| "rewards/rejected": -3.998046875, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3392, | |
| "grad_norm": 13.416289127212499, | |
| "learning_rate": 5.855891325996495e-07, | |
| "logits/chosen": -0.41424560546875, | |
| "logits/rejected": -0.42974853515625, | |
| "logps/chosen": -658.5, | |
| "logps/rejected": -730.0, | |
| "loss": 0.5526, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.15625, | |
| "rewards/margins": 0.790771484375, | |
| "rewards/rejected": -3.943359375, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.34048, | |
| "grad_norm": 12.108152563268916, | |
| "learning_rate": 5.844284190753003e-07, | |
| "logits/chosen": -0.4765625, | |
| "logits/rejected": -0.50439453125, | |
| "logps/chosen": -636.5, | |
| "logps/rejected": -707.0, | |
| "loss": 0.5176, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -2.982421875, | |
| "rewards/margins": 0.932373046875, | |
| "rewards/rejected": -3.916015625, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.34176, | |
| "grad_norm": 9.58986611812255, | |
| "learning_rate": 5.83263010556805e-07, | |
| "logits/chosen": -0.508544921875, | |
| "logits/rejected": -0.589599609375, | |
| "logps/chosen": -634.0, | |
| "logps/rejected": -738.5, | |
| "loss": 0.5, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.123046875, | |
| "rewards/margins": 0.942626953125, | |
| "rewards/rejected": -4.068359375, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.34304, | |
| "grad_norm": 10.292439732995158, | |
| "learning_rate": 5.820929303842783e-07, | |
| "logits/chosen": -0.5439453125, | |
| "logits/rejected": -0.611572265625, | |
| "logps/chosen": -568.25, | |
| "logps/rejected": -657.5, | |
| "loss": 0.4522, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -2.576171875, | |
| "rewards/margins": 1.00390625, | |
| "rewards/rejected": -3.58203125, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.34432, | |
| "grad_norm": 8.040921361038157, | |
| "learning_rate": 5.809182019913959e-07, | |
| "logits/chosen": -0.57275390625, | |
| "logits/rejected": -0.607666015625, | |
| "logps/chosen": -569.75, | |
| "logps/rejected": -623.0, | |
| "loss": 0.5395, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -2.421875, | |
| "rewards/margins": 0.765380859375, | |
| "rewards/rejected": -3.1875, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3456, | |
| "grad_norm": 12.336782262534681, | |
| "learning_rate": 5.797388489049254e-07, | |
| "logits/chosen": -0.59423828125, | |
| "logits/rejected": -0.62109375, | |
| "logps/chosen": -616.0, | |
| "logps/rejected": -659.5, | |
| "loss": 0.5252, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.69140625, | |
| "rewards/margins": 0.767822265625, | |
| "rewards/rejected": -3.45703125, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.34688, | |
| "grad_norm": 14.10037159945176, | |
| "learning_rate": 5.785548947442547e-07, | |
| "logits/chosen": -0.578125, | |
| "logits/rejected": -0.60888671875, | |
| "logps/chosen": -575.5, | |
| "logps/rejected": -677.5, | |
| "loss": 0.444, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.5703125, | |
| "rewards/margins": 0.984375, | |
| "rewards/rejected": -3.55859375, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.34816, | |
| "grad_norm": 10.631299174291128, | |
| "learning_rate": 5.773663632209201e-07, | |
| "logits/chosen": -0.559814453125, | |
| "logits/rejected": -0.641357421875, | |
| "logps/chosen": -580.5, | |
| "logps/rejected": -681.5, | |
| "loss": 0.4758, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -2.64453125, | |
| "rewards/margins": 0.9033203125, | |
| "rewards/rejected": -3.546875, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.34944, | |
| "grad_norm": 9.172576313947784, | |
| "learning_rate": 5.7617327813813e-07, | |
| "logits/chosen": -0.492431640625, | |
| "logits/rejected": -0.562744140625, | |
| "logps/chosen": -618.0, | |
| "logps/rejected": -672.5, | |
| "loss": 0.5197, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.921875, | |
| "rewards/margins": 0.7423095703125, | |
| "rewards/rejected": -3.666015625, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.35072, | |
| "grad_norm": 10.958422585425877, | |
| "learning_rate": 5.749756633902887e-07, | |
| "logits/chosen": -0.531494140625, | |
| "logits/rejected": -0.56201171875, | |
| "logps/chosen": -585.75, | |
| "logps/rejected": -664.5, | |
| "loss": 0.4871, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.65625, | |
| "rewards/margins": 0.888671875, | |
| "rewards/rejected": -3.548828125, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 11.430707520592867, | |
| "learning_rate": 5.737735429625186e-07, | |
| "logits/chosen": -0.546142578125, | |
| "logits/rejected": -0.597900390625, | |
| "logps/chosen": -664.0, | |
| "logps/rejected": -724.5, | |
| "loss": 0.5227, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.134765625, | |
| "rewards/margins": 0.956787109375, | |
| "rewards/rejected": -4.08984375, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.35328, | |
| "grad_norm": 16.483350734581908, | |
| "learning_rate": 5.725669409301782e-07, | |
| "logits/chosen": -0.44775390625, | |
| "logits/rejected": -0.451416015625, | |
| "logps/chosen": -642.0, | |
| "logps/rejected": -748.5, | |
| "loss": 0.5278, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -3.234375, | |
| "rewards/margins": 0.9853515625, | |
| "rewards/rejected": -4.224609375, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.35456, | |
| "grad_norm": 9.95398312986462, | |
| "learning_rate": 5.71355881458382e-07, | |
| "logits/chosen": -0.474609375, | |
| "logits/rejected": -0.506591796875, | |
| "logps/chosen": -635.0, | |
| "logps/rejected": -716.5, | |
| "loss": 0.4857, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.126953125, | |
| "rewards/margins": 0.912109375, | |
| "rewards/rejected": -4.033203125, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.35584, | |
| "grad_norm": 18.865132248923636, | |
| "learning_rate": 5.701403888015149e-07, | |
| "logits/chosen": -0.446044921875, | |
| "logits/rejected": -0.47314453125, | |
| "logps/chosen": -639.25, | |
| "logps/rejected": -730.0, | |
| "loss": 0.4663, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.19921875, | |
| "rewards/margins": 0.96240234375, | |
| "rewards/rejected": -4.16796875, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.35712, | |
| "grad_norm": 9.213729341091112, | |
| "learning_rate": 5.689204873027471e-07, | |
| "logits/chosen": -0.38330078125, | |
| "logits/rejected": -0.398681640625, | |
| "logps/chosen": -657.0, | |
| "logps/rejected": -725.0, | |
| "loss": 0.502, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.232421875, | |
| "rewards/margins": 0.822998046875, | |
| "rewards/rejected": -4.0546875, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3584, | |
| "grad_norm": 19.70630286015966, | |
| "learning_rate": 5.676962013935464e-07, | |
| "logits/chosen": -0.4217529296875, | |
| "logits/rejected": -0.4654541015625, | |
| "logps/chosen": -679.5, | |
| "logps/rejected": -734.5, | |
| "loss": 0.5831, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.66015625, | |
| "rewards/margins": 0.718505859375, | |
| "rewards/rejected": -4.37890625, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.35968, | |
| "grad_norm": 22.464253350390536, | |
| "learning_rate": 5.664675555931892e-07, | |
| "logits/chosen": -0.401611328125, | |
| "logits/rejected": -0.404296875, | |
| "logps/chosen": -713.0, | |
| "logps/rejected": -825.0, | |
| "loss": 0.572, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -4.025390625, | |
| "rewards/margins": 0.773193359375, | |
| "rewards/rejected": -4.796875, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.36096, | |
| "grad_norm": 10.334427726270604, | |
| "learning_rate": 5.652345745082692e-07, | |
| "logits/chosen": -0.4014892578125, | |
| "logits/rejected": -0.439208984375, | |
| "logps/chosen": -702.0, | |
| "logps/rejected": -764.0, | |
| "loss": 0.4943, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.63671875, | |
| "rewards/margins": 0.854736328125, | |
| "rewards/rejected": -4.494140625, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.36224, | |
| "grad_norm": 8.836351837837654, | |
| "learning_rate": 5.639972828322043e-07, | |
| "logits/chosen": -0.3765869140625, | |
| "logits/rejected": -0.436767578125, | |
| "logps/chosen": -676.5, | |
| "logps/rejected": -752.0, | |
| "loss": 0.4468, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.45703125, | |
| "rewards/margins": 1.09326171875, | |
| "rewards/rejected": -4.55078125, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.36352, | |
| "grad_norm": 36.01906152382893, | |
| "learning_rate": 5.627557053447426e-07, | |
| "logits/chosen": -0.3876953125, | |
| "logits/rejected": -0.413818359375, | |
| "logps/chosen": -689.0, | |
| "logps/rejected": -736.5, | |
| "loss": 0.6353, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.5625, | |
| "rewards/margins": 0.6876678466796875, | |
| "rewards/rejected": -4.24609375, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3648, | |
| "grad_norm": 10.074359287488177, | |
| "learning_rate": 5.615098669114664e-07, | |
| "logits/chosen": -0.3740234375, | |
| "logits/rejected": -0.421630859375, | |
| "logps/chosen": -666.0, | |
| "logps/rejected": -800.5, | |
| "loss": 0.4379, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.51953125, | |
| "rewards/margins": 1.00341796875, | |
| "rewards/rejected": -4.52734375, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.36608, | |
| "grad_norm": 8.877186370667038, | |
| "learning_rate": 5.602597924832926e-07, | |
| "logits/chosen": -0.508056640625, | |
| "logits/rejected": -0.53857421875, | |
| "logps/chosen": -638.0, | |
| "logps/rejected": -740.5, | |
| "loss": 0.4867, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.2109375, | |
| "rewards/margins": 1.00146484375, | |
| "rewards/rejected": -4.212890625, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.36736, | |
| "grad_norm": 9.967314448363597, | |
| "learning_rate": 5.590055070959751e-07, | |
| "logits/chosen": -0.4375, | |
| "logits/rejected": -0.4688720703125, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -734.0, | |
| "loss": 0.4381, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.158203125, | |
| "rewards/margins": 1.140625, | |
| "rewards/rejected": -4.298828125, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.36864, | |
| "grad_norm": 33.10004352946362, | |
| "learning_rate": 5.577470358696021e-07, | |
| "logits/chosen": -0.389404296875, | |
| "logits/rejected": -0.446044921875, | |
| "logps/chosen": -646.5, | |
| "logps/rejected": -739.5, | |
| "loss": 0.6066, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.439453125, | |
| "rewards/margins": 0.7655029296875, | |
| "rewards/rejected": -4.203125, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.36992, | |
| "grad_norm": 10.915408242599547, | |
| "learning_rate": 5.56484404008093e-07, | |
| "logits/chosen": -0.506103515625, | |
| "logits/rejected": -0.5328369140625, | |
| "logps/chosen": -618.5, | |
| "logps/rejected": -691.5, | |
| "loss": 0.4948, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.96484375, | |
| "rewards/margins": 1.02490234375, | |
| "rewards/rejected": -3.990234375, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3712, | |
| "grad_norm": 10.911477941525613, | |
| "learning_rate": 5.552176367986944e-07, | |
| "logits/chosen": -0.579345703125, | |
| "logits/rejected": -0.63330078125, | |
| "logps/chosen": -630.5, | |
| "logps/rejected": -765.5, | |
| "loss": 0.501, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.904296875, | |
| "rewards/margins": 0.831298828125, | |
| "rewards/rejected": -3.734375, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37248, | |
| "grad_norm": 13.643638150351943, | |
| "learning_rate": 5.539467596114729e-07, | |
| "logits/chosen": -0.488525390625, | |
| "logits/rejected": -0.54443359375, | |
| "logps/chosen": -586.5, | |
| "logps/rejected": -697.5, | |
| "loss": 0.4345, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -2.744140625, | |
| "rewards/margins": 1.025390625, | |
| "rewards/rejected": -3.76953125, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.37376, | |
| "grad_norm": 10.440042665143498, | |
| "learning_rate": 5.526717978988076e-07, | |
| "logits/chosen": -0.615234375, | |
| "logits/rejected": -0.625, | |
| "logps/chosen": -635.0, | |
| "logps/rejected": -655.0, | |
| "loss": 0.5673, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -2.779296875, | |
| "rewards/margins": 0.68798828125, | |
| "rewards/rejected": -3.470703125, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.37504, | |
| "grad_norm": 13.718400637119657, | |
| "learning_rate": 5.513927771948797e-07, | |
| "logits/chosen": -0.5810546875, | |
| "logits/rejected": -0.61767578125, | |
| "logps/chosen": -562.5, | |
| "logps/rejected": -622.25, | |
| "loss": 0.5058, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -2.4091796875, | |
| "rewards/margins": 0.7276611328125, | |
| "rewards/rejected": -3.138671875, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.37632, | |
| "grad_norm": 12.273740462313567, | |
| "learning_rate": 5.501097231151619e-07, | |
| "logits/chosen": -0.6591796875, | |
| "logits/rejected": -0.68896484375, | |
| "logps/chosen": -548.25, | |
| "logps/rejected": -623.0, | |
| "loss": 0.4711, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -2.212890625, | |
| "rewards/margins": 0.90234375, | |
| "rewards/rejected": -3.115234375, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3776, | |
| "grad_norm": 12.632600487373818, | |
| "learning_rate": 5.488226613559045e-07, | |
| "logits/chosen": -0.608154296875, | |
| "logits/rejected": -0.649169921875, | |
| "logps/chosen": -562.75, | |
| "logps/rejected": -593.75, | |
| "loss": 0.4987, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -2.2451171875, | |
| "rewards/margins": 0.77099609375, | |
| "rewards/rejected": -3.017578125, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.37888, | |
| "grad_norm": 8.796089070402305, | |
| "learning_rate": 5.475316176936217e-07, | |
| "logits/chosen": -0.593505859375, | |
| "logits/rejected": -0.647705078125, | |
| "logps/chosen": -598.5, | |
| "logps/rejected": -641.5, | |
| "loss": 0.5126, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.521484375, | |
| "rewards/margins": 0.88037109375, | |
| "rewards/rejected": -3.40234375, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.38016, | |
| "grad_norm": 8.538475094142157, | |
| "learning_rate": 5.462366179845746e-07, | |
| "logits/chosen": -0.6328125, | |
| "logits/rejected": -0.70556640625, | |
| "logps/chosen": -560.25, | |
| "logps/rejected": -608.5, | |
| "loss": 0.551, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -2.3828125, | |
| "rewards/margins": 0.629150390625, | |
| "rewards/rejected": -3.009765625, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.38144, | |
| "grad_norm": 8.516105169564174, | |
| "learning_rate": 5.449376881642538e-07, | |
| "logits/chosen": -0.524658203125, | |
| "logits/rejected": -0.607666015625, | |
| "logps/chosen": -572.75, | |
| "logps/rejected": -645.0, | |
| "loss": 0.5534, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.763671875, | |
| "rewards/margins": 0.6611328125, | |
| "rewards/rejected": -3.423828125, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.38272, | |
| "grad_norm": 8.520830137173526, | |
| "learning_rate": 5.436348542468598e-07, | |
| "logits/chosen": -0.6298828125, | |
| "logits/rejected": -0.643310546875, | |
| "logps/chosen": -637.0, | |
| "logps/rejected": -700.5, | |
| "loss": 0.5124, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -2.669921875, | |
| "rewards/margins": 0.77044677734375, | |
| "rewards/rejected": -3.4375, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 11.383934405773742, | |
| "learning_rate": 5.423281423247821e-07, | |
| "logits/chosen": -0.62939453125, | |
| "logits/rejected": -0.669921875, | |
| "logps/chosen": -626.5, | |
| "logps/rejected": -694.0, | |
| "loss": 0.4308, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -2.6640625, | |
| "rewards/margins": 0.98193359375, | |
| "rewards/rejected": -3.646484375, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "eval_logits/chosen": -0.5311279296875, | |
| "eval_logits/rejected": -0.6102294921875, | |
| "eval_logps/chosen": -589.25, | |
| "eval_logps/rejected": -647.5, | |
| "eval_loss": 0.5219140648841858, | |
| "eval_rewards/accuracies": 0.7308593988418579, | |
| "eval_rewards/chosen": -2.68359375, | |
| "eval_rewards/margins": 0.7745361328125, | |
| "eval_rewards/rejected": -3.458984375, | |
| "eval_runtime": 27.4735, | |
| "eval_samples_per_second": 18.199, | |
| "eval_steps_per_second": 0.582, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38528, | |
| "grad_norm": 9.685684454302386, | |
| "learning_rate": 5.410175785680765e-07, | |
| "logits/chosen": -0.60009765625, | |
| "logits/rejected": -0.627197265625, | |
| "logps/chosen": -610.25, | |
| "logps/rejected": -658.5, | |
| "loss": 0.5635, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -2.712890625, | |
| "rewards/margins": 0.729248046875, | |
| "rewards/rejected": -3.44140625, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.38656, | |
| "grad_norm": 15.00512977681773, | |
| "learning_rate": 5.397031892239415e-07, | |
| "logits/chosen": -0.61962890625, | |
| "logits/rejected": -0.6162109375, | |
| "logps/chosen": -609.5, | |
| "logps/rejected": -658.0, | |
| "loss": 0.4694, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.783203125, | |
| "rewards/margins": 0.82666015625, | |
| "rewards/rejected": -3.61328125, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.38784, | |
| "grad_norm": 13.021657646201135, | |
| "learning_rate": 5.383850006161913e-07, | |
| "logits/chosen": -0.453125, | |
| "logits/rejected": -0.511962890625, | |
| "logps/chosen": -610.0, | |
| "logps/rejected": -676.0, | |
| "loss": 0.5271, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.849609375, | |
| "rewards/margins": 0.75146484375, | |
| "rewards/rejected": -3.60546875, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.38912, | |
| "grad_norm": 13.16735841607135, | |
| "learning_rate": 5.370630391447304e-07, | |
| "logits/chosen": -0.53125, | |
| "logits/rejected": -0.5673828125, | |
| "logps/chosen": -589.25, | |
| "logps/rejected": -693.0, | |
| "loss": 0.4679, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -2.7890625, | |
| "rewards/margins": 0.91796875, | |
| "rewards/rejected": -3.712890625, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3904, | |
| "grad_norm": 8.650434954949796, | |
| "learning_rate": 5.357373312850235e-07, | |
| "logits/chosen": -0.49755859375, | |
| "logits/rejected": -0.5546875, | |
| "logps/chosen": -628.5, | |
| "logps/rejected": -716.0, | |
| "loss": 0.5205, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.193359375, | |
| "rewards/margins": 0.80926513671875, | |
| "rewards/rejected": -4.001953125, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.39168, | |
| "grad_norm": 12.03283706604588, | |
| "learning_rate": 5.344079035875661e-07, | |
| "logits/chosen": -0.468505859375, | |
| "logits/rejected": -0.535400390625, | |
| "logps/chosen": -658.0, | |
| "logps/rejected": -745.0, | |
| "loss": 0.5055, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.0546875, | |
| "rewards/margins": 0.9290771484375, | |
| "rewards/rejected": -3.98828125, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.39296, | |
| "grad_norm": 21.421330253934375, | |
| "learning_rate": 5.330747826773522e-07, | |
| "logits/chosen": -0.4796142578125, | |
| "logits/rejected": -0.5126953125, | |
| "logps/chosen": -613.5, | |
| "logps/rejected": -671.0, | |
| "loss": 0.5362, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.009765625, | |
| "rewards/margins": 0.86328125, | |
| "rewards/rejected": -3.873046875, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.39424, | |
| "grad_norm": 11.41199587027057, | |
| "learning_rate": 5.317379952533411e-07, | |
| "logits/chosen": -0.56982421875, | |
| "logits/rejected": -0.62548828125, | |
| "logps/chosen": -628.0, | |
| "logps/rejected": -727.5, | |
| "loss": 0.4724, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -2.916015625, | |
| "rewards/margins": 1.119140625, | |
| "rewards/rejected": -4.033203125, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.39552, | |
| "grad_norm": 13.469179182772214, | |
| "learning_rate": 5.303975680879232e-07, | |
| "logits/chosen": -0.52392578125, | |
| "logits/rejected": -0.54833984375, | |
| "logps/chosen": -615.5, | |
| "logps/rejected": -669.0, | |
| "loss": 0.5421, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -2.951171875, | |
| "rewards/margins": 0.742431640625, | |
| "rewards/rejected": -3.6953125, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.3968, | |
| "grad_norm": 16.733814154590878, | |
| "learning_rate": 5.290535280263835e-07, | |
| "logits/chosen": -0.39404296875, | |
| "logits/rejected": -0.3988037109375, | |
| "logps/chosen": -630.0, | |
| "logps/rejected": -687.5, | |
| "loss": 0.4677, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.814453125, | |
| "rewards/margins": 0.91748046875, | |
| "rewards/rejected": -3.740234375, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.39808, | |
| "grad_norm": 8.75301225998322, | |
| "learning_rate": 5.277059019863637e-07, | |
| "logits/chosen": -0.43408203125, | |
| "logits/rejected": -0.5146484375, | |
| "logps/chosen": -606.0, | |
| "logps/rejected": -685.25, | |
| "loss": 0.5019, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.65625, | |
| "rewards/margins": 0.901123046875, | |
| "rewards/rejected": -3.560546875, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.39936, | |
| "grad_norm": 8.024297545260174, | |
| "learning_rate": 5.263547169573235e-07, | |
| "logits/chosen": -0.55029296875, | |
| "logits/rejected": -0.591064453125, | |
| "logps/chosen": -567.75, | |
| "logps/rejected": -655.0, | |
| "loss": 0.5004, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.51953125, | |
| "rewards/margins": 0.76953125, | |
| "rewards/rejected": -3.2890625, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.40064, | |
| "grad_norm": 9.38829913769337, | |
| "learning_rate": 5.25e-07, | |
| "logits/chosen": -0.4615478515625, | |
| "logits/rejected": -0.5108642578125, | |
| "logps/chosen": -597.75, | |
| "logps/rejected": -676.0, | |
| "loss": 0.4893, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.59375, | |
| "rewards/margins": 0.8583984375, | |
| "rewards/rejected": -3.455078125, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.40192, | |
| "grad_norm": 8.678985799279634, | |
| "learning_rate": 5.236417782458656e-07, | |
| "logits/chosen": -0.572021484375, | |
| "logits/rejected": -0.590576171875, | |
| "logps/chosen": -636.5, | |
| "logps/rejected": -693.5, | |
| "loss": 0.5174, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.84375, | |
| "rewards/margins": 0.8515625, | |
| "rewards/rejected": -3.6953125, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4032, | |
| "grad_norm": 12.718593158199928, | |
| "learning_rate": 5.222800788965847e-07, | |
| "logits/chosen": -0.526611328125, | |
| "logits/rejected": -0.5595703125, | |
| "logps/chosen": -596.0, | |
| "logps/rejected": -672.5, | |
| "loss": 0.4431, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -2.623046875, | |
| "rewards/margins": 0.919677734375, | |
| "rewards/rejected": -3.546875, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.40448, | |
| "grad_norm": 7.7191674664296075, | |
| "learning_rate": 5.209149292234689e-07, | |
| "logits/chosen": -0.491455078125, | |
| "logits/rejected": -0.586669921875, | |
| "logps/chosen": -589.0, | |
| "logps/rejected": -668.0, | |
| "loss": 0.4413, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -2.6796875, | |
| "rewards/margins": 0.98486328125, | |
| "rewards/rejected": -3.662109375, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.40576, | |
| "grad_norm": 8.070223960301346, | |
| "learning_rate": 5.195463565669309e-07, | |
| "logits/chosen": -0.482421875, | |
| "logits/rejected": -0.54638671875, | |
| "logps/chosen": -555.5, | |
| "logps/rejected": -614.5, | |
| "loss": 0.5058, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -2.587890625, | |
| "rewards/margins": 0.83489990234375, | |
| "rewards/rejected": -3.419921875, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.40704, | |
| "grad_norm": 10.840771905557805, | |
| "learning_rate": 5.18174388335937e-07, | |
| "logits/chosen": -0.4683837890625, | |
| "logits/rejected": -0.53369140625, | |
| "logps/chosen": -602.5, | |
| "logps/rejected": -684.5, | |
| "loss": 0.4845, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.919921875, | |
| "rewards/margins": 0.88427734375, | |
| "rewards/rejected": -3.798828125, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.40832, | |
| "grad_norm": 10.638532746515596, | |
| "learning_rate": 5.167990520074577e-07, | |
| "logits/chosen": -0.458251953125, | |
| "logits/rejected": -0.510498046875, | |
| "logps/chosen": -632.5, | |
| "logps/rejected": -718.5, | |
| "loss": 0.4626, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.06640625, | |
| "rewards/margins": 1.0732421875, | |
| "rewards/rejected": -4.142578125, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.4096, | |
| "grad_norm": 19.063401240095594, | |
| "learning_rate": 5.154203751259183e-07, | |
| "logits/chosen": -0.400634765625, | |
| "logits/rejected": -0.4285888671875, | |
| "logps/chosen": -671.5, | |
| "logps/rejected": -744.5, | |
| "loss": 0.53, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.439453125, | |
| "rewards/margins": 1.008056640625, | |
| "rewards/rejected": -4.447265625, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.41088, | |
| "grad_norm": 11.58200685748918, | |
| "learning_rate": 5.140383853026462e-07, | |
| "logits/chosen": -0.2918701171875, | |
| "logits/rejected": -0.3492431640625, | |
| "logps/chosen": -709.5, | |
| "logps/rejected": -846.0, | |
| "loss": 0.4238, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.49609375, | |
| "rewards/margins": 1.197265625, | |
| "rewards/rejected": -4.6953125, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.41216, | |
| "grad_norm": 10.060507343077607, | |
| "learning_rate": 5.12653110215319e-07, | |
| "logits/chosen": -0.3533172607421875, | |
| "logits/rejected": -0.39697265625, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -751.0, | |
| "loss": 0.4708, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.43359375, | |
| "rewards/margins": 0.975830078125, | |
| "rewards/rejected": -4.40625, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.41344, | |
| "grad_norm": 20.29118301920304, | |
| "learning_rate": 5.112645776074089e-07, | |
| "logits/chosen": -0.33740234375, | |
| "logits/rejected": -0.3800048828125, | |
| "logps/chosen": -679.5, | |
| "logps/rejected": -731.0, | |
| "loss": 0.529, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.548828125, | |
| "rewards/margins": 0.9560546875, | |
| "rewards/rejected": -4.50390625, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.41472, | |
| "grad_norm": 10.344847321745984, | |
| "learning_rate": 5.098728152876287e-07, | |
| "logits/chosen": -0.3602294921875, | |
| "logits/rejected": -0.4044189453125, | |
| "logps/chosen": -676.5, | |
| "logps/rejected": -783.5, | |
| "loss": 0.4689, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.681640625, | |
| "rewards/margins": 1.164306640625, | |
| "rewards/rejected": -4.84765625, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 53.68636513984977, | |
| "learning_rate": 5.084778511293732e-07, | |
| "logits/chosen": -0.37158203125, | |
| "logits/rejected": -0.422607421875, | |
| "logps/chosen": -695.0, | |
| "logps/rejected": -795.5, | |
| "loss": 0.5701, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.08203125, | |
| "rewards/margins": 0.8125, | |
| "rewards/rejected": -4.8984375, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.41728, | |
| "grad_norm": 8.17017463161669, | |
| "learning_rate": 5.070797130701617e-07, | |
| "logits/chosen": -0.4307861328125, | |
| "logits/rejected": -0.479736328125, | |
| "logps/chosen": -671.0, | |
| "logps/rejected": -785.5, | |
| "loss": 0.45, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.505859375, | |
| "rewards/margins": 1.221435546875, | |
| "rewards/rejected": -4.73046875, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.41856, | |
| "grad_norm": 21.790110998965517, | |
| "learning_rate": 5.056784291110795e-07, | |
| "logits/chosen": -0.364990234375, | |
| "logits/rejected": -0.401123046875, | |
| "logps/chosen": -700.5, | |
| "logps/rejected": -780.0, | |
| "loss": 0.5295, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.9375, | |
| "rewards/margins": 0.91845703125, | |
| "rewards/rejected": -4.85546875, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.41984, | |
| "grad_norm": 12.801819009552196, | |
| "learning_rate": 5.04274027316215e-07, | |
| "logits/chosen": -0.3697509765625, | |
| "logits/rejected": -0.385498046875, | |
| "logps/chosen": -688.5, | |
| "logps/rejected": -746.5, | |
| "loss": 0.573, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.572265625, | |
| "rewards/margins": 0.7158203125, | |
| "rewards/rejected": -4.29296875, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.42112, | |
| "grad_norm": 9.406211305601078, | |
| "learning_rate": 5.028665358120994e-07, | |
| "logits/chosen": -0.3780517578125, | |
| "logits/rejected": -0.391845703125, | |
| "logps/chosen": -657.0, | |
| "logps/rejected": -727.0, | |
| "loss": 0.4427, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.337890625, | |
| "rewards/margins": 0.92919921875, | |
| "rewards/rejected": -4.259765625, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4224, | |
| "grad_norm": 10.033069082942374, | |
| "learning_rate": 5.014559827871426e-07, | |
| "logits/chosen": -0.425048828125, | |
| "logits/rejected": -0.448486328125, | |
| "logps/chosen": -656.0, | |
| "logps/rejected": -715.0, | |
| "loss": 0.5266, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.21484375, | |
| "rewards/margins": 0.91552734375, | |
| "rewards/rejected": -4.12890625, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.42368, | |
| "grad_norm": 9.652663655572574, | |
| "learning_rate": 5.00042396491069e-07, | |
| "logits/chosen": -0.506103515625, | |
| "logits/rejected": -0.539794921875, | |
| "logps/chosen": -658.0, | |
| "logps/rejected": -751.0, | |
| "loss": 0.4558, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.150390625, | |
| "rewards/margins": 1.1103515625, | |
| "rewards/rejected": -4.26171875, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.42496, | |
| "grad_norm": 12.260458134069255, | |
| "learning_rate": 4.986258052343511e-07, | |
| "logits/chosen": -0.4840087890625, | |
| "logits/rejected": -0.509521484375, | |
| "logps/chosen": -655.5, | |
| "logps/rejected": -725.0, | |
| "loss": 0.5506, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.10546875, | |
| "rewards/margins": 0.929931640625, | |
| "rewards/rejected": -4.033203125, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.42624, | |
| "grad_norm": 8.767742475858029, | |
| "learning_rate": 4.972062373876435e-07, | |
| "logits/chosen": -0.4326171875, | |
| "logits/rejected": -0.478515625, | |
| "logps/chosen": -643.5, | |
| "logps/rejected": -712.5, | |
| "loss": 0.5134, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.208984375, | |
| "rewards/margins": 0.804443359375, | |
| "rewards/rejected": -4.01171875, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.42752, | |
| "grad_norm": 8.183910291192875, | |
| "learning_rate": 4.95783721381214e-07, | |
| "logits/chosen": -0.564453125, | |
| "logits/rejected": -0.620361328125, | |
| "logps/chosen": -615.5, | |
| "logps/rejected": -733.5, | |
| "loss": 0.4354, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -2.94921875, | |
| "rewards/margins": 1.19970703125, | |
| "rewards/rejected": -4.150390625, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4288, | |
| "grad_norm": 12.662549180581198, | |
| "learning_rate": 4.943582857043742e-07, | |
| "logits/chosen": -0.531005859375, | |
| "logits/rejected": -0.53173828125, | |
| "logps/chosen": -628.0, | |
| "logps/rejected": -703.0, | |
| "loss": 0.4677, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -2.96484375, | |
| "rewards/margins": 0.94482421875, | |
| "rewards/rejected": -3.9140625, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.43008, | |
| "grad_norm": 11.452856436676567, | |
| "learning_rate": 4.929299589049095e-07, | |
| "logits/chosen": -0.5340576171875, | |
| "logits/rejected": -0.6025390625, | |
| "logps/chosen": -600.0, | |
| "logps/rejected": -719.0, | |
| "loss": 0.4012, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -2.669921875, | |
| "rewards/margins": 1.154296875, | |
| "rewards/rejected": -3.826171875, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.43136, | |
| "grad_norm": 8.22048523281164, | |
| "learning_rate": 4.914987695885067e-07, | |
| "logits/chosen": -0.60498046875, | |
| "logits/rejected": -0.62744140625, | |
| "logps/chosen": -667.0, | |
| "logps/rejected": -755.0, | |
| "loss": 0.483, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -2.97265625, | |
| "rewards/margins": 1.0361328125, | |
| "rewards/rejected": -4.005859375, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.43264, | |
| "grad_norm": 12.225224739430297, | |
| "learning_rate": 4.900647464181817e-07, | |
| "logits/chosen": -0.485107421875, | |
| "logits/rejected": -0.52783203125, | |
| "logps/chosen": -680.5, | |
| "logps/rejected": -767.5, | |
| "loss": 0.5177, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.234375, | |
| "rewards/margins": 1.04296875, | |
| "rewards/rejected": -4.27734375, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.43392, | |
| "grad_norm": 8.757850519576271, | |
| "learning_rate": 4.886279181137049e-07, | |
| "logits/chosen": -0.552490234375, | |
| "logits/rejected": -0.578369140625, | |
| "logps/chosen": -653.0, | |
| "logps/rejected": -717.5, | |
| "loss": 0.5448, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.142578125, | |
| "rewards/margins": 0.8095703125, | |
| "rewards/rejected": -3.955078125, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4352, | |
| "grad_norm": 9.695868477167725, | |
| "learning_rate": 4.871883134510262e-07, | |
| "logits/chosen": -0.494384765625, | |
| "logits/rejected": -0.51611328125, | |
| "logps/chosen": -625.0, | |
| "logps/rejected": -719.5, | |
| "loss": 0.4424, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.021484375, | |
| "rewards/margins": 1.091796875, | |
| "rewards/rejected": -4.111328125, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.43648, | |
| "grad_norm": 8.243809920140835, | |
| "learning_rate": 4.857459612616992e-07, | |
| "logits/chosen": -0.506591796875, | |
| "logits/rejected": -0.54443359375, | |
| "logps/chosen": -581.0, | |
| "logps/rejected": -647.0, | |
| "loss": 0.5348, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -2.822265625, | |
| "rewards/margins": 0.7535400390625, | |
| "rewards/rejected": -3.580078125, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.43776, | |
| "grad_norm": 18.55929104936941, | |
| "learning_rate": 4.843008904323029e-07, | |
| "logits/chosen": -0.501708984375, | |
| "logits/rejected": -0.51806640625, | |
| "logps/chosen": -652.5, | |
| "logps/rejected": -742.0, | |
| "loss": 0.4282, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.1953125, | |
| "rewards/margins": 1.115234375, | |
| "rewards/rejected": -4.31640625, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.43904, | |
| "grad_norm": 10.539994924615243, | |
| "learning_rate": 4.828531299038638e-07, | |
| "logits/chosen": -0.484375, | |
| "logits/rejected": -0.52587890625, | |
| "logps/chosen": -627.0, | |
| "logps/rejected": -710.5, | |
| "loss": 0.5168, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.1171875, | |
| "rewards/margins": 0.879150390625, | |
| "rewards/rejected": -3.99609375, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.44032, | |
| "grad_norm": 14.239765085999206, | |
| "learning_rate": 4.81402708671276e-07, | |
| "logits/chosen": -0.473876953125, | |
| "logits/rejected": -0.54345703125, | |
| "logps/chosen": -616.0, | |
| "logps/rejected": -745.5, | |
| "loss": 0.3896, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -3.083984375, | |
| "rewards/margins": 1.267822265625, | |
| "rewards/rejected": -4.3515625, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.4416, | |
| "grad_norm": 11.762433092658819, | |
| "learning_rate": 4.799496557827208e-07, | |
| "logits/chosen": -0.507568359375, | |
| "logits/rejected": -0.552001953125, | |
| "logps/chosen": -673.0, | |
| "logps/rejected": -818.5, | |
| "loss": 0.4055, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.478515625, | |
| "rewards/margins": 1.3271484375, | |
| "rewards/rejected": -4.80859375, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.44288, | |
| "grad_norm": 13.3714208302887, | |
| "learning_rate": 4.784940003390846e-07, | |
| "logits/chosen": -0.47705078125, | |
| "logits/rejected": -0.4874267578125, | |
| "logps/chosen": -651.0, | |
| "logps/rejected": -722.5, | |
| "loss": 0.484, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.33203125, | |
| "rewards/margins": 0.9072265625, | |
| "rewards/rejected": -4.244140625, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.44416, | |
| "grad_norm": 8.419739675084086, | |
| "learning_rate": 4.770357714933765e-07, | |
| "logits/chosen": -0.3489990234375, | |
| "logits/rejected": -0.399169921875, | |
| "logps/chosen": -715.5, | |
| "logps/rejected": -804.0, | |
| "loss": 0.4466, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.701171875, | |
| "rewards/margins": 1.22509765625, | |
| "rewards/rejected": -4.9296875, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.44544, | |
| "grad_norm": 28.282829493788125, | |
| "learning_rate": 4.7557499845014363e-07, | |
| "logits/chosen": -0.34809112548828125, | |
| "logits/rejected": -0.3986968994140625, | |
| "logps/chosen": -692.0, | |
| "logps/rejected": -785.5, | |
| "loss": 0.5644, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.8125, | |
| "rewards/margins": 1.04931640625, | |
| "rewards/rejected": -4.859375, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.44672, | |
| "grad_norm": 11.217438269564665, | |
| "learning_rate": 4.741117104648874e-07, | |
| "logits/chosen": -0.368408203125, | |
| "logits/rejected": -0.39208984375, | |
| "logps/chosen": -736.5, | |
| "logps/rejected": -841.0, | |
| "loss": 0.4152, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.939453125, | |
| "rewards/margins": 1.28564453125, | |
| "rewards/rejected": -5.2265625, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 9.988120972029757, | |
| "learning_rate": 4.726459368434768e-07, | |
| "logits/chosen": -0.345458984375, | |
| "logits/rejected": -0.3746337890625, | |
| "logps/chosen": -782.5, | |
| "logps/rejected": -898.5, | |
| "loss": 0.4168, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -4.28125, | |
| "rewards/margins": 1.29931640625, | |
| "rewards/rejected": -5.578125, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.44928, | |
| "grad_norm": 27.1774086371912, | |
| "learning_rate": 4.7117770694156146e-07, | |
| "logits/chosen": -0.35791015625, | |
| "logits/rejected": -0.396728515625, | |
| "logps/chosen": -795.0, | |
| "logps/rejected": -910.0, | |
| "loss": 0.5071, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.61328125, | |
| "rewards/margins": 1.2353515625, | |
| "rewards/rejected": -5.84765625, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.45056, | |
| "grad_norm": 18.51059077469931, | |
| "learning_rate": 4.697070501639841e-07, | |
| "logits/chosen": -0.3016357421875, | |
| "logits/rejected": -0.3563232421875, | |
| "logps/chosen": -777.0, | |
| "logps/rejected": -924.0, | |
| "loss": 0.4595, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.75390625, | |
| "rewards/margins": 1.1455078125, | |
| "rewards/rejected": -5.90234375, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.45184, | |
| "grad_norm": 33.97653228823091, | |
| "learning_rate": 4.682339959641915e-07, | |
| "logits/chosen": -0.3653564453125, | |
| "logits/rejected": -0.433349609375, | |
| "logps/chosen": -755.0, | |
| "logps/rejected": -877.0, | |
| "loss": 0.505, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -4.537109375, | |
| "rewards/margins": 1.25146484375, | |
| "rewards/rejected": -5.7890625, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.45312, | |
| "grad_norm": 38.200517048662974, | |
| "learning_rate": 4.6675857384364475e-07, | |
| "logits/chosen": -0.3599853515625, | |
| "logits/rejected": -0.429931640625, | |
| "logps/chosen": -783.0, | |
| "logps/rejected": -925.0, | |
| "loss": 0.5449, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.583984375, | |
| "rewards/margins": 1.157470703125, | |
| "rewards/rejected": -5.7421875, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4544, | |
| "grad_norm": 24.734123344344813, | |
| "learning_rate": 4.6528081335122786e-07, | |
| "logits/chosen": -0.34062957763671875, | |
| "logits/rejected": -0.3896484375, | |
| "logps/chosen": -734.5, | |
| "logps/rejected": -849.0, | |
| "loss": 0.522, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.123046875, | |
| "rewards/margins": 1.119140625, | |
| "rewards/rejected": -5.2421875, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.45568, | |
| "grad_norm": 8.65649758872943, | |
| "learning_rate": 4.6380074408265677e-07, | |
| "logits/chosen": -0.3984375, | |
| "logits/rejected": -0.3931884765625, | |
| "logps/chosen": -743.0, | |
| "logps/rejected": -840.0, | |
| "loss": 0.4604, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.982421875, | |
| "rewards/margins": 1.145751953125, | |
| "rewards/rejected": -5.12890625, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.45696, | |
| "grad_norm": 13.642027050493772, | |
| "learning_rate": 4.62318395679886e-07, | |
| "logits/chosen": -0.3692626953125, | |
| "logits/rejected": -0.432861328125, | |
| "logps/chosen": -708.25, | |
| "logps/rejected": -823.5, | |
| "loss": 0.4635, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.76171875, | |
| "rewards/margins": 1.205078125, | |
| "rewards/rejected": -4.9609375, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.45824, | |
| "grad_norm": 8.597706559419494, | |
| "learning_rate": 4.608337978305154e-07, | |
| "logits/chosen": -0.404541015625, | |
| "logits/rejected": -0.471435546875, | |
| "logps/chosen": -671.5, | |
| "logps/rejected": -778.5, | |
| "loss": 0.465, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.478515625, | |
| "rewards/margins": 1.0322265625, | |
| "rewards/rejected": -4.515625, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.45952, | |
| "grad_norm": 16.144497938016837, | |
| "learning_rate": 4.593469802671951e-07, | |
| "logits/chosen": -0.454833984375, | |
| "logits/rejected": -0.503173828125, | |
| "logps/chosen": -623.0, | |
| "logps/rejected": -733.5, | |
| "loss": 0.427, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.236328125, | |
| "rewards/margins": 1.34521484375, | |
| "rewards/rejected": -4.58203125, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.4608, | |
| "grad_norm": 15.765499984405258, | |
| "learning_rate": 4.5785797276703074e-07, | |
| "logits/chosen": -0.46826171875, | |
| "logits/rejected": -0.511474609375, | |
| "logps/chosen": -686.5, | |
| "logps/rejected": -804.0, | |
| "loss": 0.5152, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.60546875, | |
| "rewards/margins": 1.046875, | |
| "rewards/rejected": -4.65625, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.46208, | |
| "grad_norm": 8.245124791845544, | |
| "learning_rate": 4.563668051509864e-07, | |
| "logits/chosen": -0.49560546875, | |
| "logits/rejected": -0.5390625, | |
| "logps/chosen": -630.5, | |
| "logps/rejected": -747.5, | |
| "loss": 0.4031, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.111328125, | |
| "rewards/margins": 1.2119140625, | |
| "rewards/rejected": -4.326171875, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.46336, | |
| "grad_norm": 8.559688059984063, | |
| "learning_rate": 4.5487350728328796e-07, | |
| "logits/chosen": -0.4332275390625, | |
| "logits/rejected": -0.4951171875, | |
| "logps/chosen": -630.0, | |
| "logps/rejected": -726.0, | |
| "loss": 0.4651, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.236328125, | |
| "rewards/margins": 1.109375, | |
| "rewards/rejected": -4.34765625, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.46464, | |
| "grad_norm": 10.160592154999383, | |
| "learning_rate": 4.533781090708244e-07, | |
| "logits/chosen": -0.454345703125, | |
| "logits/rejected": -0.46337890625, | |
| "logps/chosen": -671.5, | |
| "logps/rejected": -767.0, | |
| "loss": 0.495, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.326171875, | |
| "rewards/margins": 0.933349609375, | |
| "rewards/rejected": -4.259765625, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.46592, | |
| "grad_norm": 8.364222921089874, | |
| "learning_rate": 4.518806404625495e-07, | |
| "logits/chosen": -0.494140625, | |
| "logits/rejected": -0.537109375, | |
| "logps/chosen": -641.0, | |
| "logps/rejected": -734.5, | |
| "loss": 0.4365, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.1640625, | |
| "rewards/margins": 1.05615234375, | |
| "rewards/rejected": -4.220703125, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4672, | |
| "grad_norm": 9.713400168567283, | |
| "learning_rate": 4.503811314488816e-07, | |
| "logits/chosen": -0.428955078125, | |
| "logits/rejected": -0.471923828125, | |
| "logps/chosen": -652.5, | |
| "logps/rejected": -697.0, | |
| "loss": 0.5314, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -3.28125, | |
| "rewards/margins": 0.78515625, | |
| "rewards/rejected": -4.06640625, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.46848, | |
| "grad_norm": 8.9296942425344, | |
| "learning_rate": 4.488796120611029e-07, | |
| "logits/chosen": -0.44140625, | |
| "logits/rejected": -0.49609375, | |
| "logps/chosen": -610.25, | |
| "logps/rejected": -724.5, | |
| "loss": 0.5128, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.134765625, | |
| "rewards/margins": 1.03662109375, | |
| "rewards/rejected": -4.169921875, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.46976, | |
| "grad_norm": 8.165959460812232, | |
| "learning_rate": 4.4737611237075845e-07, | |
| "logits/chosen": -0.47265625, | |
| "logits/rejected": -0.504150390625, | |
| "logps/chosen": -656.0, | |
| "logps/rejected": -809.5, | |
| "loss": 0.4489, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.306640625, | |
| "rewards/margins": 1.1591796875, | |
| "rewards/rejected": -4.46484375, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.47104, | |
| "grad_norm": 15.893475743625158, | |
| "learning_rate": 4.4587066248905335e-07, | |
| "logits/chosen": -0.407470703125, | |
| "logits/rejected": -0.41796875, | |
| "logps/chosen": -636.0, | |
| "logps/rejected": -754.5, | |
| "loss": 0.4304, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.076171875, | |
| "rewards/margins": 1.13623046875, | |
| "rewards/rejected": -4.212890625, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.47232, | |
| "grad_norm": 12.204585822720064, | |
| "learning_rate": 4.443632925662504e-07, | |
| "logits/chosen": -0.416015625, | |
| "logits/rejected": -0.44873046875, | |
| "logps/chosen": -596.5, | |
| "logps/rejected": -670.0, | |
| "loss": 0.491, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -2.927734375, | |
| "rewards/margins": 0.92626953125, | |
| "rewards/rejected": -3.8515625, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.4736, | |
| "grad_norm": 11.003968270027663, | |
| "learning_rate": 4.4285403279106523e-07, | |
| "logits/chosen": -0.3580322265625, | |
| "logits/rejected": -0.4219970703125, | |
| "logps/chosen": -611.5, | |
| "logps/rejected": -725.5, | |
| "loss": 0.4313, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.126953125, | |
| "rewards/margins": 1.04248046875, | |
| "rewards/rejected": -4.162109375, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.47488, | |
| "grad_norm": 199.6202974665465, | |
| "learning_rate": 4.4134291339006305e-07, | |
| "logits/chosen": -0.3883056640625, | |
| "logits/rejected": -0.38299560546875, | |
| "logps/chosen": -646.5, | |
| "logps/rejected": -762.0, | |
| "loss": 0.5204, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.2890625, | |
| "rewards/margins": 1.00927734375, | |
| "rewards/rejected": -4.296875, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.47616, | |
| "grad_norm": 11.144489957125527, | |
| "learning_rate": 4.3982996462705184e-07, | |
| "logits/chosen": -0.398193359375, | |
| "logits/rejected": -0.4439697265625, | |
| "logps/chosen": -675.0, | |
| "logps/rejected": -765.5, | |
| "loss": 0.4755, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.494140625, | |
| "rewards/margins": 1.0732421875, | |
| "rewards/rejected": -4.5703125, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.47744, | |
| "grad_norm": 12.09850803118018, | |
| "learning_rate": 4.383152168024776e-07, | |
| "logits/chosen": -0.3857421875, | |
| "logits/rejected": -0.430419921875, | |
| "logps/chosen": -658.0, | |
| "logps/rejected": -753.0, | |
| "loss": 0.5137, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.525390625, | |
| "rewards/margins": 0.9542236328125, | |
| "rewards/rejected": -4.482421875, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.47872, | |
| "grad_norm": 10.440049833545846, | |
| "learning_rate": 4.3679870025281645e-07, | |
| "logits/chosen": -0.3140869140625, | |
| "logits/rejected": -0.341064453125, | |
| "logps/chosen": -693.0, | |
| "logps/rejected": -782.0, | |
| "loss": 0.5013, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.654296875, | |
| "rewards/margins": 0.970458984375, | |
| "rewards/rejected": -4.626953125, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 18.264633064403437, | |
| "learning_rate": 4.3528044534996764e-07, | |
| "logits/chosen": -0.296142578125, | |
| "logits/rejected": -0.359619140625, | |
| "logps/chosen": -638.0, | |
| "logps/rejected": -704.5, | |
| "loss": 0.5782, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.236328125, | |
| "rewards/margins": 0.8702392578125, | |
| "rewards/rejected": -4.109375, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.48128, | |
| "grad_norm": 8.417915455120893, | |
| "learning_rate": 4.337604825006452e-07, | |
| "logits/chosen": -0.3731689453125, | |
| "logits/rejected": -0.4154052734375, | |
| "logps/chosen": -638.0, | |
| "logps/rejected": -742.5, | |
| "loss": 0.4726, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.23828125, | |
| "rewards/margins": 1.09521484375, | |
| "rewards/rejected": -4.33203125, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.48256, | |
| "grad_norm": 8.947246273156576, | |
| "learning_rate": 4.3223884214576875e-07, | |
| "logits/chosen": -0.369049072265625, | |
| "logits/rejected": -0.3988037109375, | |
| "logps/chosen": -686.5, | |
| "logps/rejected": -759.5, | |
| "loss": 0.5215, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.53515625, | |
| "rewards/margins": 0.819091796875, | |
| "rewards/rejected": -4.35546875, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.48384, | |
| "grad_norm": 9.885820986822363, | |
| "learning_rate": 4.3071555475985404e-07, | |
| "logits/chosen": -0.347412109375, | |
| "logits/rejected": -0.433349609375, | |
| "logps/chosen": -605.5, | |
| "logps/rejected": -716.5, | |
| "loss": 0.4847, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.021484375, | |
| "rewards/margins": 1.01611328125, | |
| "rewards/rejected": -4.0390625, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.48512, | |
| "grad_norm": 8.772239493272059, | |
| "learning_rate": 4.2919065085040284e-07, | |
| "logits/chosen": -0.371337890625, | |
| "logits/rejected": -0.4169921875, | |
| "logps/chosen": -646.0, | |
| "logps/rejected": -746.0, | |
| "loss": 0.4578, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.228515625, | |
| "rewards/margins": 1.074462890625, | |
| "rewards/rejected": -4.298828125, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4864, | |
| "grad_norm": 10.720670863122297, | |
| "learning_rate": 4.2766416095729113e-07, | |
| "logits/chosen": -0.37646484375, | |
| "logits/rejected": -0.43701171875, | |
| "logps/chosen": -662.5, | |
| "logps/rejected": -751.5, | |
| "loss": 0.4435, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.1328125, | |
| "rewards/margins": 1.14599609375, | |
| "rewards/rejected": -4.275390625, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.48768, | |
| "grad_norm": 11.03467333595442, | |
| "learning_rate": 4.261361156521586e-07, | |
| "logits/chosen": -0.458740234375, | |
| "logits/rejected": -0.530517578125, | |
| "logps/chosen": -665.5, | |
| "logps/rejected": -731.0, | |
| "loss": 0.4372, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.1015625, | |
| "rewards/margins": 1.2353515625, | |
| "rewards/rejected": -4.33984375, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.48896, | |
| "grad_norm": 9.450090321989622, | |
| "learning_rate": 4.2460654553779557e-07, | |
| "logits/chosen": -0.43212890625, | |
| "logits/rejected": -0.47119140625, | |
| "logps/chosen": -619.75, | |
| "logps/rejected": -714.0, | |
| "loss": 0.5313, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.197265625, | |
| "rewards/margins": 0.80224609375, | |
| "rewards/rejected": -3.99609375, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.49024, | |
| "grad_norm": 14.256426075381693, | |
| "learning_rate": 4.230754812475305e-07, | |
| "logits/chosen": -0.40234375, | |
| "logits/rejected": -0.401611328125, | |
| "logps/chosen": -587.25, | |
| "logps/rejected": -672.0, | |
| "loss": 0.4617, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -2.955078125, | |
| "rewards/margins": 0.952392578125, | |
| "rewards/rejected": -3.90234375, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.49152, | |
| "grad_norm": 16.353106293708784, | |
| "learning_rate": 4.2154295344461614e-07, | |
| "logits/chosen": -0.456787109375, | |
| "logits/rejected": -0.4775390625, | |
| "logps/chosen": -661.0, | |
| "logps/rejected": -743.5, | |
| "loss": 0.4423, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.201171875, | |
| "rewards/margins": 1.04833984375, | |
| "rewards/rejected": -4.251953125, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4928, | |
| "grad_norm": 11.021886637647597, | |
| "learning_rate": 4.2000899282161556e-07, | |
| "logits/chosen": -0.51171875, | |
| "logits/rejected": -0.52880859375, | |
| "logps/chosen": -652.5, | |
| "logps/rejected": -727.5, | |
| "loss": 0.4629, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.22265625, | |
| "rewards/margins": 1.03076171875, | |
| "rewards/rejected": -4.251953125, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.49408, | |
| "grad_norm": 12.461963298467701, | |
| "learning_rate": 4.1847363009978773e-07, | |
| "logits/chosen": -0.3712158203125, | |
| "logits/rejected": -0.412353515625, | |
| "logps/chosen": -608.0, | |
| "logps/rejected": -697.5, | |
| "loss": 0.4655, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.07421875, | |
| "rewards/margins": 1.073974609375, | |
| "rewards/rejected": -4.1484375, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.49536, | |
| "grad_norm": 11.664191303420534, | |
| "learning_rate": 4.169368960284718e-07, | |
| "logits/chosen": -0.2545166015625, | |
| "logits/rejected": -0.374755859375, | |
| "logps/chosen": -602.5, | |
| "logps/rejected": -677.0, | |
| "loss": 0.5172, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.953125, | |
| "rewards/margins": 0.90625, | |
| "rewards/rejected": -3.857421875, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.49664, | |
| "grad_norm": 9.22000344583139, | |
| "learning_rate": 4.1539882138447173e-07, | |
| "logits/chosen": -0.439453125, | |
| "logits/rejected": -0.486328125, | |
| "logps/chosen": -675.0, | |
| "logps/rejected": -789.5, | |
| "loss": 0.437, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.564453125, | |
| "rewards/margins": 1.2265625, | |
| "rewards/rejected": -4.796875, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.49792, | |
| "grad_norm": 10.388005400752823, | |
| "learning_rate": 4.138594369714394e-07, | |
| "logits/chosen": -0.369140625, | |
| "logits/rejected": -0.390625, | |
| "logps/chosen": -634.0, | |
| "logps/rejected": -700.5, | |
| "loss": 0.515, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.412109375, | |
| "rewards/margins": 0.984130859375, | |
| "rewards/rejected": -4.400390625, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4992, | |
| "grad_norm": 19.773927339862066, | |
| "learning_rate": 4.1231877361925835e-07, | |
| "logits/chosen": -0.27435302734375, | |
| "logits/rejected": -0.29425048828125, | |
| "logps/chosen": -664.0, | |
| "logps/rejected": -725.5, | |
| "loss": 0.5527, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.544921875, | |
| "rewards/margins": 0.83642578125, | |
| "rewards/rejected": -4.37890625, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.50048, | |
| "grad_norm": 26.019664464089015, | |
| "learning_rate": 4.1077686218342565e-07, | |
| "logits/chosen": -0.300048828125, | |
| "logits/rejected": -0.372314453125, | |
| "logps/chosen": -690.5, | |
| "logps/rejected": -777.0, | |
| "loss": 0.6148, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.828125, | |
| "rewards/margins": 0.8681640625, | |
| "rewards/rejected": -4.69921875, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.50176, | |
| "grad_norm": 9.566179596760582, | |
| "learning_rate": 4.0923373354443425e-07, | |
| "logits/chosen": -0.34130859375, | |
| "logits/rejected": -0.38330078125, | |
| "logps/chosen": -703.5, | |
| "logps/rejected": -784.0, | |
| "loss": 0.5242, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.611328125, | |
| "rewards/margins": 0.9840087890625, | |
| "rewards/rejected": -4.595703125, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.50304, | |
| "grad_norm": 9.68452388640175, | |
| "learning_rate": 4.076894186071548e-07, | |
| "logits/chosen": -0.363037109375, | |
| "logits/rejected": -0.41796875, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -764.0, | |
| "loss": 0.4657, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.33984375, | |
| "rewards/margins": 1.11669921875, | |
| "rewards/rejected": -4.45703125, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.50432, | |
| "grad_norm": 12.917842813325114, | |
| "learning_rate": 4.0614394830021604e-07, | |
| "logits/chosen": -0.3944091796875, | |
| "logits/rejected": -0.46484375, | |
| "logps/chosen": -638.5, | |
| "logps/rejected": -742.0, | |
| "loss": 0.4348, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.1484375, | |
| "rewards/margins": 1.259033203125, | |
| "rewards/rejected": -4.41015625, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "grad_norm": 9.314495046466147, | |
| "learning_rate": 4.0459735357538624e-07, | |
| "logits/chosen": -0.392578125, | |
| "logits/rejected": -0.4384765625, | |
| "logps/chosen": -630.5, | |
| "logps/rejected": -722.0, | |
| "loss": 0.4576, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.1796875, | |
| "rewards/margins": 1.0947265625, | |
| "rewards/rejected": -4.26953125, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.50688, | |
| "grad_norm": 10.547291120299455, | |
| "learning_rate": 4.030496654069524e-07, | |
| "logits/chosen": -0.4755859375, | |
| "logits/rejected": -0.510009765625, | |
| "logps/chosen": -657.0, | |
| "logps/rejected": -736.5, | |
| "loss": 0.4343, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.126953125, | |
| "rewards/margins": 1.056640625, | |
| "rewards/rejected": -4.181640625, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.50816, | |
| "grad_norm": 8.647735781009173, | |
| "learning_rate": 4.0150091479110063e-07, | |
| "logits/chosen": -0.362060546875, | |
| "logits/rejected": -0.4324951171875, | |
| "logps/chosen": -639.0, | |
| "logps/rejected": -751.5, | |
| "loss": 0.4441, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.1640625, | |
| "rewards/margins": 1.125, | |
| "rewards/rejected": -4.291015625, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.50944, | |
| "grad_norm": 8.598126228517861, | |
| "learning_rate": 3.99951132745295e-07, | |
| "logits/chosen": -0.4058837890625, | |
| "logits/rejected": -0.457275390625, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -845.0, | |
| "loss": 0.4575, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.447265625, | |
| "rewards/margins": 1.125, | |
| "rewards/rejected": -4.57421875, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.51072, | |
| "grad_norm": 8.013248595167934, | |
| "learning_rate": 3.984003503076566e-07, | |
| "logits/chosen": -0.36474609375, | |
| "logits/rejected": -0.416748046875, | |
| "logps/chosen": -674.5, | |
| "logps/rejected": -795.0, | |
| "loss": 0.424, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.474609375, | |
| "rewards/margins": 1.25830078125, | |
| "rewards/rejected": -4.73046875, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 9.871007801580504, | |
| "learning_rate": 3.968485985363416e-07, | |
| "logits/chosen": -0.343994140625, | |
| "logits/rejected": -0.3740234375, | |
| "logps/chosen": -646.5, | |
| "logps/rejected": -742.5, | |
| "loss": 0.4432, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.3203125, | |
| "rewards/margins": 1.1904296875, | |
| "rewards/rejected": -4.5078125, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "eval_logits/chosen": -0.31884765625, | |
| "eval_logits/rejected": -0.39837646484375, | |
| "eval_logps/chosen": -663.0, | |
| "eval_logps/rejected": -743.5, | |
| "eval_loss": 0.49900001287460327, | |
| "eval_rewards/accuracies": 0.741406261920929, | |
| "eval_rewards/chosen": -3.4296875, | |
| "eval_rewards/margins": 0.991943359375, | |
| "eval_rewards/rejected": -4.4228515625, | |
| "eval_runtime": 27.5022, | |
| "eval_samples_per_second": 18.18, | |
| "eval_steps_per_second": 0.582, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.51328, | |
| "grad_norm": 9.918254110325114, | |
| "learning_rate": 3.9529590850891934e-07, | |
| "logits/chosen": -0.31048583984375, | |
| "logits/rejected": -0.33154296875, | |
| "logps/chosen": -675.0, | |
| "logps/rejected": -770.0, | |
| "loss": 0.4547, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.498046875, | |
| "rewards/margins": 1.178955078125, | |
| "rewards/rejected": -4.67578125, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.51456, | |
| "grad_norm": 10.592955302658622, | |
| "learning_rate": 3.9374231132175044e-07, | |
| "logits/chosen": -0.3048095703125, | |
| "logits/rejected": -0.3560791015625, | |
| "logps/chosen": -670.0, | |
| "logps/rejected": -794.5, | |
| "loss": 0.4097, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.53515625, | |
| "rewards/margins": 1.30126953125, | |
| "rewards/rejected": -4.84375, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.51584, | |
| "grad_norm": 9.608106146275887, | |
| "learning_rate": 3.92187838089363e-07, | |
| "logits/chosen": -0.361328125, | |
| "logits/rejected": -0.39404296875, | |
| "logps/chosen": -707.5, | |
| "logps/rejected": -818.5, | |
| "loss": 0.3828, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.662109375, | |
| "rewards/margins": 1.3974609375, | |
| "rewards/rejected": -5.0546875, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.51712, | |
| "grad_norm": 14.34108279812411, | |
| "learning_rate": 3.906325199438306e-07, | |
| "logits/chosen": -0.335693359375, | |
| "logits/rejected": -0.384765625, | |
| "logps/chosen": -711.5, | |
| "logps/rejected": -813.0, | |
| "loss": 0.4382, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.736328125, | |
| "rewards/margins": 1.1923828125, | |
| "rewards/rejected": -4.93359375, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5184, | |
| "grad_norm": 11.139031446712131, | |
| "learning_rate": 3.890763880341477e-07, | |
| "logits/chosen": -0.3443603515625, | |
| "logits/rejected": -0.3773193359375, | |
| "logps/chosen": -662.75, | |
| "logps/rejected": -766.0, | |
| "loss": 0.5007, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.744140625, | |
| "rewards/margins": 1.0185546875, | |
| "rewards/rejected": -4.76953125, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.51968, | |
| "grad_norm": 8.55091082093681, | |
| "learning_rate": 3.875194735256067e-07, | |
| "logits/chosen": -0.3232421875, | |
| "logits/rejected": -0.3831787109375, | |
| "logps/chosen": -675.0, | |
| "logps/rejected": -830.5, | |
| "loss": 0.4086, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.86328125, | |
| "rewards/margins": 1.25634765625, | |
| "rewards/rejected": -5.119140625, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.52096, | |
| "grad_norm": 11.406620827960465, | |
| "learning_rate": 3.859618075991735e-07, | |
| "logits/chosen": -0.28350830078125, | |
| "logits/rejected": -0.31494140625, | |
| "logps/chosen": -730.0, | |
| "logps/rejected": -811.0, | |
| "loss": 0.5632, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.947265625, | |
| "rewards/margins": 0.9580078125, | |
| "rewards/rejected": -4.90625, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.52224, | |
| "grad_norm": 11.55895260093868, | |
| "learning_rate": 3.8440342145086245e-07, | |
| "logits/chosen": -0.2462158203125, | |
| "logits/rejected": -0.32208251953125, | |
| "logps/chosen": -710.5, | |
| "logps/rejected": -829.0, | |
| "loss": 0.465, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.90234375, | |
| "rewards/margins": 1.115234375, | |
| "rewards/rejected": -5.01953125, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.52352, | |
| "grad_norm": 9.334623486544036, | |
| "learning_rate": 3.828443462911127e-07, | |
| "logits/chosen": -0.235107421875, | |
| "logits/rejected": -0.2991943359375, | |
| "logps/chosen": -705.0, | |
| "logps/rejected": -859.0, | |
| "loss": 0.4231, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.93359375, | |
| "rewards/margins": 1.3603515625, | |
| "rewards/rejected": -5.2890625, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.5248, | |
| "grad_norm": 9.304030036813096, | |
| "learning_rate": 3.8128461334416223e-07, | |
| "logits/chosen": -0.2989501953125, | |
| "logits/rejected": -0.375732421875, | |
| "logps/chosen": -671.0, | |
| "logps/rejected": -773.0, | |
| "loss": 0.4455, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.55078125, | |
| "rewards/margins": 1.16357421875, | |
| "rewards/rejected": -4.71484375, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.52608, | |
| "grad_norm": 21.163435172332214, | |
| "learning_rate": 3.7972425384742267e-07, | |
| "logits/chosen": -0.275146484375, | |
| "logits/rejected": -0.3277587890625, | |
| "logps/chosen": -727.5, | |
| "logps/rejected": -806.0, | |
| "loss": 0.5246, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.919921875, | |
| "rewards/margins": 0.976318359375, | |
| "rewards/rejected": -4.89453125, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.52736, | |
| "grad_norm": 15.093249035393466, | |
| "learning_rate": 3.781632990508541e-07, | |
| "logits/chosen": -0.327880859375, | |
| "logits/rejected": -0.344482421875, | |
| "logps/chosen": -738.5, | |
| "logps/rejected": -821.5, | |
| "loss": 0.5157, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.953125, | |
| "rewards/margins": 1.029296875, | |
| "rewards/rejected": -4.98046875, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.52864, | |
| "grad_norm": 8.914765884163025, | |
| "learning_rate": 3.766017802163386e-07, | |
| "logits/chosen": -0.361083984375, | |
| "logits/rejected": -0.388916015625, | |
| "logps/chosen": -689.5, | |
| "logps/rejected": -740.5, | |
| "loss": 0.4922, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.609375, | |
| "rewards/margins": 1.026611328125, | |
| "rewards/rejected": -4.6328125, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.52992, | |
| "grad_norm": 13.842987260589894, | |
| "learning_rate": 3.750397286170548e-07, | |
| "logits/chosen": -0.40478515625, | |
| "logits/rejected": -0.459716796875, | |
| "logps/chosen": -705.5, | |
| "logps/rejected": -816.0, | |
| "loss": 0.4627, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.615234375, | |
| "rewards/margins": 1.1142578125, | |
| "rewards/rejected": -4.734375, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5312, | |
| "grad_norm": 10.861145048772103, | |
| "learning_rate": 3.734771755368508e-07, | |
| "logits/chosen": -0.413818359375, | |
| "logits/rejected": -0.449462890625, | |
| "logps/chosen": -667.0, | |
| "logps/rejected": -737.0, | |
| "loss": 0.4652, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.353515625, | |
| "rewards/margins": 0.997314453125, | |
| "rewards/rejected": -4.3515625, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.53248, | |
| "grad_norm": 10.048437842384944, | |
| "learning_rate": 3.7191415226961866e-07, | |
| "logits/chosen": -0.4249267578125, | |
| "logits/rejected": -0.449951171875, | |
| "logps/chosen": -663.5, | |
| "logps/rejected": -741.0, | |
| "loss": 0.5003, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.177734375, | |
| "rewards/margins": 1.00439453125, | |
| "rewards/rejected": -4.17578125, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.53376, | |
| "grad_norm": 7.560247582517948, | |
| "learning_rate": 3.703506901186665e-07, | |
| "logits/chosen": -0.41876220703125, | |
| "logits/rejected": -0.4765625, | |
| "logps/chosen": -679.0, | |
| "logps/rejected": -779.5, | |
| "loss": 0.4335, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.23046875, | |
| "rewards/margins": 1.267578125, | |
| "rewards/rejected": -4.5, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.53504, | |
| "grad_norm": 10.537078564076927, | |
| "learning_rate": 3.687868203960925e-07, | |
| "logits/chosen": -0.408935546875, | |
| "logits/rejected": -0.434326171875, | |
| "logps/chosen": -634.0, | |
| "logps/rejected": -747.5, | |
| "loss": 0.4401, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.978515625, | |
| "rewards/margins": 1.08984375, | |
| "rewards/rejected": -4.06640625, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.53632, | |
| "grad_norm": 13.668617572855705, | |
| "learning_rate": 3.6722257442215735e-07, | |
| "logits/chosen": -0.4404296875, | |
| "logits/rejected": -0.4970703125, | |
| "logps/chosen": -678.0, | |
| "logps/rejected": -792.5, | |
| "loss": 0.4615, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.37890625, | |
| "rewards/margins": 1.1044921875, | |
| "rewards/rejected": -4.48828125, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.5376, | |
| "grad_norm": 12.228990359857354, | |
| "learning_rate": 3.6565798352465697e-07, | |
| "logits/chosen": -0.485107421875, | |
| "logits/rejected": -0.503173828125, | |
| "logps/chosen": -628.0, | |
| "logps/rejected": -704.5, | |
| "loss": 0.4871, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.03125, | |
| "rewards/margins": 0.918701171875, | |
| "rewards/rejected": -3.947265625, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.53888, | |
| "grad_norm": 18.18437340639748, | |
| "learning_rate": 3.640930790382953e-07, | |
| "logits/chosen": -0.3985595703125, | |
| "logits/rejected": -0.4619140625, | |
| "logps/chosen": -645.0, | |
| "logps/rejected": -726.5, | |
| "loss": 0.4444, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.162109375, | |
| "rewards/margins": 0.968994140625, | |
| "rewards/rejected": -4.130859375, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.54016, | |
| "grad_norm": 11.370178937016053, | |
| "learning_rate": 3.625278923040567e-07, | |
| "logits/chosen": -0.40625, | |
| "logits/rejected": -0.40673828125, | |
| "logps/chosen": -689.5, | |
| "logps/rejected": -748.0, | |
| "loss": 0.519, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.4765625, | |
| "rewards/margins": 0.92724609375, | |
| "rewards/rejected": -4.40234375, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.54144, | |
| "grad_norm": 14.61661184515439, | |
| "learning_rate": 3.6096245466857807e-07, | |
| "logits/chosen": -0.3848876953125, | |
| "logits/rejected": -0.416748046875, | |
| "logps/chosen": -681.0, | |
| "logps/rejected": -737.5, | |
| "loss": 0.4409, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.384765625, | |
| "rewards/margins": 1.01220703125, | |
| "rewards/rejected": -4.396484375, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.54272, | |
| "grad_norm": 12.818023117655002, | |
| "learning_rate": 3.5939679748352143e-07, | |
| "logits/chosen": -0.3614501953125, | |
| "logits/rejected": -0.43359375, | |
| "logps/chosen": -664.0, | |
| "logps/rejected": -786.5, | |
| "loss": 0.4274, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.3046875, | |
| "rewards/margins": 1.12353515625, | |
| "rewards/rejected": -4.431640625, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 12.339337246571736, | |
| "learning_rate": 3.578309521049456e-07, | |
| "logits/chosen": -0.3673095703125, | |
| "logits/rejected": -0.4122314453125, | |
| "logps/chosen": -664.0, | |
| "logps/rejected": -762.5, | |
| "loss": 0.4744, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.564453125, | |
| "rewards/margins": 1.170166015625, | |
| "rewards/rejected": -4.736328125, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.54528, | |
| "grad_norm": 8.658934278329045, | |
| "learning_rate": 3.562649498926785e-07, | |
| "logits/chosen": -0.2666015625, | |
| "logits/rejected": -0.2794189453125, | |
| "logps/chosen": -670.0, | |
| "logps/rejected": -773.5, | |
| "loss": 0.4346, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.388671875, | |
| "rewards/margins": 1.10498046875, | |
| "rewards/rejected": -4.490234375, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.54656, | |
| "grad_norm": 16.97395118117586, | |
| "learning_rate": 3.5469882220968913e-07, | |
| "logits/chosen": -0.334716796875, | |
| "logits/rejected": -0.3743896484375, | |
| "logps/chosen": -627.5, | |
| "logps/rejected": -758.0, | |
| "loss": 0.5011, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.55078125, | |
| "rewards/margins": 1.0, | |
| "rewards/rejected": -4.5546875, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.54784, | |
| "grad_norm": 10.000428326458785, | |
| "learning_rate": 3.531326004214592e-07, | |
| "logits/chosen": -0.234283447265625, | |
| "logits/rejected": -0.29119873046875, | |
| "logps/chosen": -709.0, | |
| "logps/rejected": -835.5, | |
| "loss": 0.4368, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.861328125, | |
| "rewards/margins": 1.24755859375, | |
| "rewards/rejected": -5.10546875, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.54912, | |
| "grad_norm": 17.876551163341734, | |
| "learning_rate": 3.5156631589535516e-07, | |
| "logits/chosen": -0.305419921875, | |
| "logits/rejected": -0.3280029296875, | |
| "logps/chosen": -721.5, | |
| "logps/rejected": -828.0, | |
| "loss": 0.5295, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -4.10546875, | |
| "rewards/margins": 1.10888671875, | |
| "rewards/rejected": -5.21875, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5504, | |
| "grad_norm": 22.498342188721203, | |
| "learning_rate": 3.5e-07, | |
| "logits/chosen": -0.380859375, | |
| "logits/rejected": -0.43505859375, | |
| "logps/chosen": -739.0, | |
| "logps/rejected": -841.0, | |
| "loss": 0.533, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -4.107421875, | |
| "rewards/margins": 0.91064453125, | |
| "rewards/rejected": -5.015625, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.55168, | |
| "grad_norm": 12.69840485691294, | |
| "learning_rate": 3.484336841046448e-07, | |
| "logits/chosen": -0.28363037109375, | |
| "logits/rejected": -0.329345703125, | |
| "logps/chosen": -718.5, | |
| "logps/rejected": -844.0, | |
| "loss": 0.4312, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.939453125, | |
| "rewards/margins": 1.17919921875, | |
| "rewards/rejected": -5.11328125, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.55296, | |
| "grad_norm": 15.269189564746817, | |
| "learning_rate": 3.468673995785409e-07, | |
| "logits/chosen": -0.3050537109375, | |
| "logits/rejected": -0.2933349609375, | |
| "logps/chosen": -757.5, | |
| "logps/rejected": -809.5, | |
| "loss": 0.5201, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.078125, | |
| "rewards/margins": 1.0419921875, | |
| "rewards/rejected": -5.1171875, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.55424, | |
| "grad_norm": 10.916565840639656, | |
| "learning_rate": 3.4530117779031095e-07, | |
| "logits/chosen": -0.28759765625, | |
| "logits/rejected": -0.30859375, | |
| "logps/chosen": -741.0, | |
| "logps/rejected": -798.5, | |
| "loss": 0.5074, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.970703125, | |
| "rewards/margins": 0.98486328125, | |
| "rewards/rejected": -4.95703125, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.55552, | |
| "grad_norm": 8.956894754018172, | |
| "learning_rate": 3.4373505010732153e-07, | |
| "logits/chosen": -0.336181640625, | |
| "logits/rejected": -0.366455078125, | |
| "logps/chosen": -745.0, | |
| "logps/rejected": -831.0, | |
| "loss": 0.4109, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.966796875, | |
| "rewards/margins": 1.26806640625, | |
| "rewards/rejected": -5.23046875, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5568, | |
| "grad_norm": 10.251791767078773, | |
| "learning_rate": 3.4216904789505446e-07, | |
| "logits/chosen": -0.2631034851074219, | |
| "logits/rejected": -0.291534423828125, | |
| "logps/chosen": -701.5, | |
| "logps/rejected": -824.0, | |
| "loss": 0.4353, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.8203125, | |
| "rewards/margins": 1.3251953125, | |
| "rewards/rejected": -5.14453125, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.55808, | |
| "grad_norm": 17.42176040192877, | |
| "learning_rate": 3.4060320251647864e-07, | |
| "logits/chosen": -0.309478759765625, | |
| "logits/rejected": -0.3680419921875, | |
| "logps/chosen": -712.5, | |
| "logps/rejected": -840.5, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.96484375, | |
| "rewards/margins": 1.13525390625, | |
| "rewards/rejected": -5.10546875, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.55936, | |
| "grad_norm": 8.54035065196297, | |
| "learning_rate": 3.3903754533142195e-07, | |
| "logits/chosen": -0.256591796875, | |
| "logits/rejected": -0.332275390625, | |
| "logps/chosen": -719.0, | |
| "logps/rejected": -851.0, | |
| "loss": 0.4279, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.943359375, | |
| "rewards/margins": 1.35791015625, | |
| "rewards/rejected": -5.30078125, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.56064, | |
| "grad_norm": 8.537378854753193, | |
| "learning_rate": 3.3747210769594327e-07, | |
| "logits/chosen": -0.25079345703125, | |
| "logits/rejected": -0.331298828125, | |
| "logps/chosen": -699.0, | |
| "logps/rejected": -829.0, | |
| "loss": 0.4387, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.8828125, | |
| "rewards/margins": 1.21044921875, | |
| "rewards/rejected": -5.08984375, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.56192, | |
| "grad_norm": 9.144351590625533, | |
| "learning_rate": 3.359069209617048e-07, | |
| "logits/chosen": -0.314208984375, | |
| "logits/rejected": -0.3914337158203125, | |
| "logps/chosen": -697.0, | |
| "logps/rejected": -799.0, | |
| "loss": 0.4947, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.8203125, | |
| "rewards/margins": 1.109375, | |
| "rewards/rejected": -4.9296875, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5632, | |
| "grad_norm": 14.655918977438441, | |
| "learning_rate": 3.3434201647534305e-07, | |
| "logits/chosen": -0.3173828125, | |
| "logits/rejected": -0.363525390625, | |
| "logps/chosen": -686.0, | |
| "logps/rejected": -800.5, | |
| "loss": 0.4074, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.587890625, | |
| "rewards/margins": 1.189453125, | |
| "rewards/rejected": -4.76953125, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.56448, | |
| "grad_norm": 9.37268098222402, | |
| "learning_rate": 3.327774255778426e-07, | |
| "logits/chosen": -0.35009765625, | |
| "logits/rejected": -0.3543701171875, | |
| "logps/chosen": -706.0, | |
| "logps/rejected": -837.0, | |
| "loss": 0.4304, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.71875, | |
| "rewards/margins": 1.23095703125, | |
| "rewards/rejected": -4.94921875, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.56576, | |
| "grad_norm": 8.893240235048026, | |
| "learning_rate": 3.312131796039074e-07, | |
| "logits/chosen": -0.3359375, | |
| "logits/rejected": -0.35595703125, | |
| "logps/chosen": -720.0, | |
| "logps/rejected": -820.5, | |
| "loss": 0.4758, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.982421875, | |
| "rewards/margins": 1.11669921875, | |
| "rewards/rejected": -5.10546875, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.56704, | |
| "grad_norm": 9.905678959597498, | |
| "learning_rate": 3.2964930988133347e-07, | |
| "logits/chosen": -0.31103515625, | |
| "logits/rejected": -0.3660888671875, | |
| "logps/chosen": -677.5, | |
| "logps/rejected": -814.5, | |
| "loss": 0.498, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.58984375, | |
| "rewards/margins": 1.150390625, | |
| "rewards/rejected": -4.73828125, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.56832, | |
| "grad_norm": 9.08992784313469, | |
| "learning_rate": 3.280858477303813e-07, | |
| "logits/chosen": -0.39501953125, | |
| "logits/rejected": -0.420166015625, | |
| "logps/chosen": -693.0, | |
| "logps/rejected": -835.5, | |
| "loss": 0.4553, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.744140625, | |
| "rewards/margins": 1.2099609375, | |
| "rewards/rejected": -4.94921875, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5696, | |
| "grad_norm": 11.048886602570247, | |
| "learning_rate": 3.265228244631491e-07, | |
| "logits/chosen": -0.3572998046875, | |
| "logits/rejected": -0.3760986328125, | |
| "logps/chosen": -655.0, | |
| "logps/rejected": -766.5, | |
| "loss": 0.4808, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.58203125, | |
| "rewards/margins": 0.9876708984375, | |
| "rewards/rejected": -4.57421875, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.57088, | |
| "grad_norm": 16.49283952772338, | |
| "learning_rate": 3.2496027138294534e-07, | |
| "logits/chosen": -0.3392333984375, | |
| "logits/rejected": -0.38818359375, | |
| "logps/chosen": -695.5, | |
| "logps/rejected": -786.0, | |
| "loss": 0.5221, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.708984375, | |
| "rewards/margins": 1.129638671875, | |
| "rewards/rejected": -4.84375, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.57216, | |
| "grad_norm": 13.865402250549591, | |
| "learning_rate": 3.2339821978366143e-07, | |
| "logits/chosen": -0.3529052734375, | |
| "logits/rejected": -0.415283203125, | |
| "logps/chosen": -705.5, | |
| "logps/rejected": -801.0, | |
| "loss": 0.414, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.7890625, | |
| "rewards/margins": 1.2333984375, | |
| "rewards/rejected": -5.015625, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.57344, | |
| "grad_norm": 9.732935361331597, | |
| "learning_rate": 3.218367009491459e-07, | |
| "logits/chosen": -0.30615234375, | |
| "logits/rejected": -0.3333740234375, | |
| "logps/chosen": -690.0, | |
| "logps/rejected": -790.5, | |
| "loss": 0.5015, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.7265625, | |
| "rewards/margins": 1.05517578125, | |
| "rewards/rejected": -4.78125, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.57472, | |
| "grad_norm": 9.154715837042305, | |
| "learning_rate": 3.2027574615257724e-07, | |
| "logits/chosen": -0.3072509765625, | |
| "logits/rejected": -0.3399658203125, | |
| "logps/chosen": -722.5, | |
| "logps/rejected": -808.0, | |
| "loss": 0.4555, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.783203125, | |
| "rewards/margins": 1.1298828125, | |
| "rewards/rejected": -4.91015625, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 8.928902784266844, | |
| "learning_rate": 3.1871538665583784e-07, | |
| "logits/chosen": -0.33447265625, | |
| "logits/rejected": -0.3792724609375, | |
| "logps/chosen": -701.0, | |
| "logps/rejected": -844.5, | |
| "loss": 0.439, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.796875, | |
| "rewards/margins": 1.2041015625, | |
| "rewards/rejected": -5.00390625, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.57728, | |
| "grad_norm": 12.00098250688527, | |
| "learning_rate": 3.1715565370888724e-07, | |
| "logits/chosen": -0.29534912109375, | |
| "logits/rejected": -0.3519287109375, | |
| "logps/chosen": -685.0, | |
| "logps/rejected": -775.0, | |
| "loss": 0.5097, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.541015625, | |
| "rewards/margins": 1.02294921875, | |
| "rewards/rejected": -4.5625, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.57856, | |
| "grad_norm": 12.6273655314952, | |
| "learning_rate": 3.155965785491375e-07, | |
| "logits/chosen": -0.2778053283691406, | |
| "logits/rejected": -0.3127593994140625, | |
| "logps/chosen": -666.5, | |
| "logps/rejected": -733.0, | |
| "loss": 0.5567, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": -3.607421875, | |
| "rewards/margins": 0.78857421875, | |
| "rewards/rejected": -4.392578125, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.57984, | |
| "grad_norm": 15.49253693464398, | |
| "learning_rate": 3.140381924008266e-07, | |
| "logits/chosen": -0.3681640625, | |
| "logits/rejected": -0.415771484375, | |
| "logps/chosen": -679.5, | |
| "logps/rejected": -758.5, | |
| "loss": 0.5302, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.544921875, | |
| "rewards/margins": 1.06982421875, | |
| "rewards/rejected": -4.61328125, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.58112, | |
| "grad_norm": 14.072242757167011, | |
| "learning_rate": 3.1248052647439325e-07, | |
| "logits/chosen": -0.314361572265625, | |
| "logits/rejected": -0.35498046875, | |
| "logps/chosen": -683.0, | |
| "logps/rejected": -766.5, | |
| "loss": 0.4328, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.375, | |
| "rewards/margins": 1.099609375, | |
| "rewards/rejected": -4.47265625, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5824, | |
| "grad_norm": 8.998262402466967, | |
| "learning_rate": 3.109236119658523e-07, | |
| "logits/chosen": -0.39013671875, | |
| "logits/rejected": -0.46435546875, | |
| "logps/chosen": -694.0, | |
| "logps/rejected": -787.0, | |
| "loss": 0.4334, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.5234375, | |
| "rewards/margins": 1.29052734375, | |
| "rewards/rejected": -4.81640625, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.58368, | |
| "grad_norm": 9.654473976654984, | |
| "learning_rate": 3.0936748005616934e-07, | |
| "logits/chosen": -0.4014892578125, | |
| "logits/rejected": -0.4482421875, | |
| "logps/chosen": -652.5, | |
| "logps/rejected": -773.0, | |
| "loss": 0.4763, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -3.390625, | |
| "rewards/margins": 1.03076171875, | |
| "rewards/rejected": -4.41796875, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.58496, | |
| "grad_norm": 9.304597500860096, | |
| "learning_rate": 3.07812161910637e-07, | |
| "logits/chosen": -0.38525390625, | |
| "logits/rejected": -0.466064453125, | |
| "logps/chosen": -677.0, | |
| "logps/rejected": -786.5, | |
| "loss": 0.4882, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.505859375, | |
| "rewards/margins": 1.04443359375, | |
| "rewards/rejected": -4.55078125, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.58624, | |
| "grad_norm": 10.03299414048815, | |
| "learning_rate": 3.062576886782496e-07, | |
| "logits/chosen": -0.33111572265625, | |
| "logits/rejected": -0.343994140625, | |
| "logps/chosen": -669.0, | |
| "logps/rejected": -749.0, | |
| "loss": 0.4664, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.41796875, | |
| "rewards/margins": 1.0966796875, | |
| "rewards/rejected": -4.51953125, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.58752, | |
| "grad_norm": 8.481068595485672, | |
| "learning_rate": 3.0470409149108057e-07, | |
| "logits/chosen": -0.376220703125, | |
| "logits/rejected": -0.4453125, | |
| "logps/chosen": -693.5, | |
| "logps/rejected": -784.5, | |
| "loss": 0.4547, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.419921875, | |
| "rewards/margins": 1.06201171875, | |
| "rewards/rejected": -4.478515625, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5888, | |
| "grad_norm": 12.321469175204998, | |
| "learning_rate": 3.0315140146365854e-07, | |
| "logits/chosen": -0.338134765625, | |
| "logits/rejected": -0.415283203125, | |
| "logps/chosen": -632.5, | |
| "logps/rejected": -762.0, | |
| "loss": 0.4179, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.32421875, | |
| "rewards/margins": 1.26171875, | |
| "rewards/rejected": -4.5859375, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.59008, | |
| "grad_norm": 10.17822938822944, | |
| "learning_rate": 3.0159964969234345e-07, | |
| "logits/chosen": -0.31658935546875, | |
| "logits/rejected": -0.399658203125, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -803.5, | |
| "loss": 0.4285, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.5390625, | |
| "rewards/margins": 1.218505859375, | |
| "rewards/rejected": -4.75390625, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.59136, | |
| "grad_norm": 7.7993467189482075, | |
| "learning_rate": 3.00048867254705e-07, | |
| "logits/chosen": -0.43994140625, | |
| "logits/rejected": -0.50244140625, | |
| "logps/chosen": -688.0, | |
| "logps/rejected": -806.0, | |
| "loss": 0.4446, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.525390625, | |
| "rewards/margins": 1.19580078125, | |
| "rewards/rejected": -4.71875, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.59264, | |
| "grad_norm": 12.021053759681038, | |
| "learning_rate": 2.9849908520889934e-07, | |
| "logits/chosen": -0.35205078125, | |
| "logits/rejected": -0.4013671875, | |
| "logps/chosen": -703.5, | |
| "logps/rejected": -813.0, | |
| "loss": 0.4805, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.78515625, | |
| "rewards/margins": 1.2294921875, | |
| "rewards/rejected": -5.015625, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.59392, | |
| "grad_norm": 9.606440908673006, | |
| "learning_rate": 2.9695033459304765e-07, | |
| "logits/chosen": -0.3104248046875, | |
| "logits/rejected": -0.3370361328125, | |
| "logps/chosen": -719.5, | |
| "logps/rejected": -809.5, | |
| "loss": 0.4665, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.779296875, | |
| "rewards/margins": 1.021728515625, | |
| "rewards/rejected": -4.80078125, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5952, | |
| "grad_norm": 9.429466706631539, | |
| "learning_rate": 2.954026464246138e-07, | |
| "logits/chosen": -0.308349609375, | |
| "logits/rejected": -0.3970947265625, | |
| "logps/chosen": -685.0, | |
| "logps/rejected": -803.5, | |
| "loss": 0.4333, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.5859375, | |
| "rewards/margins": 1.34521484375, | |
| "rewards/rejected": -4.93359375, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.59648, | |
| "grad_norm": 8.733380658733111, | |
| "learning_rate": 2.938560516997839e-07, | |
| "logits/chosen": -0.3392333984375, | |
| "logits/rejected": -0.385986328125, | |
| "logps/chosen": -679.0, | |
| "logps/rejected": -788.0, | |
| "loss": 0.4456, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.580078125, | |
| "rewards/margins": 1.158203125, | |
| "rewards/rejected": -4.732421875, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.59776, | |
| "grad_norm": 11.051837804187693, | |
| "learning_rate": 2.923105813928453e-07, | |
| "logits/chosen": -0.35302734375, | |
| "logits/rejected": -0.400390625, | |
| "logps/chosen": -681.5, | |
| "logps/rejected": -801.0, | |
| "loss": 0.4298, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.771484375, | |
| "rewards/margins": 1.23046875, | |
| "rewards/rejected": -5.00390625, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.59904, | |
| "grad_norm": 10.90589653843686, | |
| "learning_rate": 2.907662664555658e-07, | |
| "logits/chosen": -0.24847412109375, | |
| "logits/rejected": -0.321044921875, | |
| "logps/chosen": -716.5, | |
| "logps/rejected": -895.5, | |
| "loss": 0.4646, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.005859375, | |
| "rewards/margins": 1.11767578125, | |
| "rewards/rejected": -5.125, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.60032, | |
| "grad_norm": 11.37286435597706, | |
| "learning_rate": 2.8922313781657437e-07, | |
| "logits/chosen": -0.258544921875, | |
| "logits/rejected": -0.29833984375, | |
| "logps/chosen": -707.0, | |
| "logps/rejected": -853.0, | |
| "loss": 0.4346, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.875, | |
| "rewards/margins": 1.4248046875, | |
| "rewards/rejected": -5.302734375, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6016, | |
| "grad_norm": 12.238759131932019, | |
| "learning_rate": 2.876812263807417e-07, | |
| "logits/chosen": -0.31640625, | |
| "logits/rejected": -0.36279296875, | |
| "logps/chosen": -757.0, | |
| "logps/rejected": -893.5, | |
| "loss": 0.3726, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -4.361328125, | |
| "rewards/margins": 1.45849609375, | |
| "rewards/rejected": -5.8125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.60288, | |
| "grad_norm": 14.41231269695093, | |
| "learning_rate": 2.861405630285606e-07, | |
| "logits/chosen": -0.203125, | |
| "logits/rejected": -0.233642578125, | |
| "logps/chosen": -747.0, | |
| "logps/rejected": -901.5, | |
| "loss": 0.3898, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.962890625, | |
| "rewards/margins": 1.37841796875, | |
| "rewards/rejected": -5.33984375, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.60416, | |
| "grad_norm": 10.17467437176661, | |
| "learning_rate": 2.8460117861552834e-07, | |
| "logits/chosen": -0.1627197265625, | |
| "logits/rejected": -0.205810546875, | |
| "logps/chosen": -750.5, | |
| "logps/rejected": -889.5, | |
| "loss": 0.4308, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -4.59375, | |
| "rewards/margins": 1.18017578125, | |
| "rewards/rejected": -5.77734375, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.60544, | |
| "grad_norm": 18.928934732354712, | |
| "learning_rate": 2.8306310397152813e-07, | |
| "logits/chosen": -0.1800537109375, | |
| "logits/rejected": -0.21759796142578125, | |
| "logps/chosen": -792.5, | |
| "logps/rejected": -936.0, | |
| "loss": 0.4427, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.73828125, | |
| "rewards/margins": 1.591796875, | |
| "rewards/rejected": -6.33203125, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.60672, | |
| "grad_norm": 22.570954000361684, | |
| "learning_rate": 2.815263699002124e-07, | |
| "logits/chosen": -0.104888916015625, | |
| "logits/rejected": -0.14886474609375, | |
| "logps/chosen": -865.0, | |
| "logps/rejected": -989.0, | |
| "loss": 0.4815, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -5.34765625, | |
| "rewards/margins": 1.2919921875, | |
| "rewards/rejected": -6.640625, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 11.072003778958646, | |
| "learning_rate": 2.799910071783845e-07, | |
| "logits/chosen": -0.0941619873046875, | |
| "logits/rejected": -0.12345123291015625, | |
| "logps/chosen": -875.0, | |
| "logps/rejected": -1017.5, | |
| "loss": 0.4723, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -5.3984375, | |
| "rewards/margins": 1.45703125, | |
| "rewards/rejected": -6.8515625, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.60928, | |
| "grad_norm": 22.291924161435414, | |
| "learning_rate": 2.7845704655538383e-07, | |
| "logits/chosen": -0.145660400390625, | |
| "logits/rejected": -0.185394287109375, | |
| "logps/chosen": -858.5, | |
| "logps/rejected": -1001.5, | |
| "loss": 0.5071, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -5.328125, | |
| "rewards/margins": 1.337890625, | |
| "rewards/rejected": -6.66796875, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.61056, | |
| "grad_norm": 49.81448523508282, | |
| "learning_rate": 2.7692451875246955e-07, | |
| "logits/chosen": -0.1240692138671875, | |
| "logits/rejected": -0.1556243896484375, | |
| "logps/chosen": -825.5, | |
| "logps/rejected": -936.0, | |
| "loss": 0.637, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -5.234375, | |
| "rewards/margins": 1.135498046875, | |
| "rewards/rejected": -6.375, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.61184, | |
| "grad_norm": 13.559544047897164, | |
| "learning_rate": 2.753934544622044e-07, | |
| "logits/chosen": -0.0382232666015625, | |
| "logits/rejected": -0.0726776123046875, | |
| "logps/chosen": -860.0, | |
| "logps/rejected": -959.5, | |
| "loss": 0.5071, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -5.5390625, | |
| "rewards/margins": 1.2286376953125, | |
| "rewards/rejected": -6.765625, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.61312, | |
| "grad_norm": 44.498812483306665, | |
| "learning_rate": 2.7386388434784144e-07, | |
| "logits/chosen": -0.1600341796875, | |
| "logits/rejected": -0.229248046875, | |
| "logps/chosen": -800.0, | |
| "logps/rejected": -891.0, | |
| "loss": 0.541, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.859375, | |
| "rewards/margins": 1.0419921875, | |
| "rewards/rejected": -5.90234375, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.6144, | |
| "grad_norm": 10.090939267052912, | |
| "learning_rate": 2.723358390427089e-07, | |
| "logits/chosen": -0.07647705078125, | |
| "logits/rejected": -0.1278076171875, | |
| "logps/chosen": -830.0, | |
| "logps/rejected": -963.0, | |
| "loss": 0.3957, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -5.05078125, | |
| "rewards/margins": 1.451171875, | |
| "rewards/rejected": -6.49609375, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.61568, | |
| "grad_norm": 36.24985599924041, | |
| "learning_rate": 2.708093491495973e-07, | |
| "logits/chosen": -0.191162109375, | |
| "logits/rejected": -0.2340087890625, | |
| "logps/chosen": -795.0, | |
| "logps/rejected": -943.5, | |
| "loss": 0.5742, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -4.89453125, | |
| "rewards/margins": 1.2020416259765625, | |
| "rewards/rejected": -6.09375, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.61696, | |
| "grad_norm": 9.559993471114256, | |
| "learning_rate": 2.6928444524014593e-07, | |
| "logits/chosen": -0.0987396240234375, | |
| "logits/rejected": -0.13189697265625, | |
| "logps/chosen": -821.5, | |
| "logps/rejected": -962.0, | |
| "loss": 0.4207, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.91015625, | |
| "rewards/margins": 1.5185546875, | |
| "rewards/rejected": -6.4296875, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.61824, | |
| "grad_norm": 21.064286757896273, | |
| "learning_rate": 2.677611578542312e-07, | |
| "logits/chosen": -0.180572509765625, | |
| "logits/rejected": -0.177886962890625, | |
| "logps/chosen": -761.5, | |
| "logps/rejected": -863.5, | |
| "loss": 0.518, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.3125, | |
| "rewards/margins": 1.28662109375, | |
| "rewards/rejected": -5.59765625, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.61952, | |
| "grad_norm": 16.47721480533661, | |
| "learning_rate": 2.6623951749935486e-07, | |
| "logits/chosen": -0.19525146484375, | |
| "logits/rejected": -0.232696533203125, | |
| "logps/chosen": -750.5, | |
| "logps/rejected": -880.0, | |
| "loss": 0.5357, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -4.375, | |
| "rewards/margins": 1.01416015625, | |
| "rewards/rejected": -5.39453125, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6208, | |
| "grad_norm": 14.729444247013049, | |
| "learning_rate": 2.6471955465003233e-07, | |
| "logits/chosen": -0.146209716796875, | |
| "logits/rejected": -0.181396484375, | |
| "logps/chosen": -727.0, | |
| "logps/rejected": -855.5, | |
| "loss": 0.4494, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.181640625, | |
| "rewards/margins": 1.361328125, | |
| "rewards/rejected": -5.54296875, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.62208, | |
| "grad_norm": 17.822317812146366, | |
| "learning_rate": 2.6320129974718357e-07, | |
| "logits/chosen": -0.2357177734375, | |
| "logits/rejected": -0.266845703125, | |
| "logps/chosen": -755.5, | |
| "logps/rejected": -898.5, | |
| "loss": 0.3869, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -4.2265625, | |
| "rewards/margins": 1.5546875, | |
| "rewards/rejected": -5.78125, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.62336, | |
| "grad_norm": 18.035398000203894, | |
| "learning_rate": 2.6168478319752235e-07, | |
| "logits/chosen": -0.25274658203125, | |
| "logits/rejected": -0.2952880859375, | |
| "logps/chosen": -715.5, | |
| "logps/rejected": -808.5, | |
| "loss": 0.53, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.82421875, | |
| "rewards/margins": 0.89306640625, | |
| "rewards/rejected": -4.7109375, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.62464, | |
| "grad_norm": 11.523996728652335, | |
| "learning_rate": 2.6017003537294813e-07, | |
| "logits/chosen": -0.28271484375, | |
| "logits/rejected": -0.321533203125, | |
| "logps/chosen": -650.0, | |
| "logps/rejected": -767.5, | |
| "loss": 0.4265, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.513671875, | |
| "rewards/margins": 1.33837890625, | |
| "rewards/rejected": -4.85546875, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.62592, | |
| "grad_norm": 9.3216241550967, | |
| "learning_rate": 2.58657086609937e-07, | |
| "logits/chosen": -0.29815673828125, | |
| "logits/rejected": -0.323974609375, | |
| "logps/chosen": -692.25, | |
| "logps/rejected": -793.5, | |
| "loss": 0.4857, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.697265625, | |
| "rewards/margins": 0.97802734375, | |
| "rewards/rejected": -4.673828125, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.6272, | |
| "grad_norm": 9.12204855358185, | |
| "learning_rate": 2.5714596720893474e-07, | |
| "logits/chosen": -0.1910400390625, | |
| "logits/rejected": -0.24755859375, | |
| "logps/chosen": -638.0, | |
| "logps/rejected": -753.0, | |
| "loss": 0.49, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.484375, | |
| "rewards/margins": 1.13525390625, | |
| "rewards/rejected": -4.62109375, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.62848, | |
| "grad_norm": 9.031432208381775, | |
| "learning_rate": 2.5563670743374974e-07, | |
| "logits/chosen": -0.311279296875, | |
| "logits/rejected": -0.331787109375, | |
| "logps/chosen": -661.0, | |
| "logps/rejected": -733.5, | |
| "loss": 0.4686, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.4296875, | |
| "rewards/margins": 1.078125, | |
| "rewards/rejected": -4.509765625, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.62976, | |
| "grad_norm": 11.835986491275541, | |
| "learning_rate": 2.541293375109466e-07, | |
| "logits/chosen": -0.323974609375, | |
| "logits/rejected": -0.363525390625, | |
| "logps/chosen": -632.5, | |
| "logps/rejected": -759.0, | |
| "loss": 0.4363, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.412109375, | |
| "rewards/margins": 1.064453125, | |
| "rewards/rejected": -4.4765625, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.63104, | |
| "grad_norm": 8.263522495741764, | |
| "learning_rate": 2.5262388762924157e-07, | |
| "logits/chosen": -0.341064453125, | |
| "logits/rejected": -0.380859375, | |
| "logps/chosen": -661.5, | |
| "logps/rejected": -789.5, | |
| "loss": 0.401, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.357421875, | |
| "rewards/margins": 1.31689453125, | |
| "rewards/rejected": -4.671875, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.63232, | |
| "grad_norm": 16.717553631249775, | |
| "learning_rate": 2.511203879388971e-07, | |
| "logits/chosen": -0.28839111328125, | |
| "logits/rejected": -0.35125732421875, | |
| "logps/chosen": -655.5, | |
| "logps/rejected": -778.0, | |
| "loss": 0.399, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.291015625, | |
| "rewards/margins": 1.16796875, | |
| "rewards/rejected": -4.45703125, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6336, | |
| "grad_norm": 9.39340513518324, | |
| "learning_rate": 2.496188685511185e-07, | |
| "logits/chosen": -0.27813720703125, | |
| "logits/rejected": -0.288360595703125, | |
| "logps/chosen": -666.5, | |
| "logps/rejected": -738.0, | |
| "loss": 0.4937, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.365234375, | |
| "rewards/margins": 0.90673828125, | |
| "rewards/rejected": -4.26953125, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.63488, | |
| "grad_norm": 8.21962622555432, | |
| "learning_rate": 2.481193595374505e-07, | |
| "logits/chosen": -0.22674560546875, | |
| "logits/rejected": -0.27642822265625, | |
| "logps/chosen": -603.75, | |
| "logps/rejected": -724.0, | |
| "loss": 0.478, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.115234375, | |
| "rewards/margins": 1.0169677734375, | |
| "rewards/rejected": -4.12890625, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.63616, | |
| "grad_norm": 10.565279172163, | |
| "learning_rate": 2.466218909291756e-07, | |
| "logits/chosen": -0.32861328125, | |
| "logits/rejected": -0.385009765625, | |
| "logps/chosen": -659.5, | |
| "logps/rejected": -752.5, | |
| "loss": 0.494, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.30078125, | |
| "rewards/margins": 1.0400390625, | |
| "rewards/rejected": -4.337890625, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.63744, | |
| "grad_norm": 14.800631609015028, | |
| "learning_rate": 2.451264927167121e-07, | |
| "logits/chosen": -0.378173828125, | |
| "logits/rejected": -0.388671875, | |
| "logps/chosen": -676.5, | |
| "logps/rejected": -738.5, | |
| "loss": 0.5109, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.45703125, | |
| "rewards/margins": 0.9403076171875, | |
| "rewards/rejected": -4.396484375, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.63872, | |
| "grad_norm": 14.511150427574877, | |
| "learning_rate": 2.436331948490136e-07, | |
| "logits/chosen": -0.3680419921875, | |
| "logits/rejected": -0.44677734375, | |
| "logps/chosen": -638.5, | |
| "logps/rejected": -751.0, | |
| "loss": 0.5025, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.4140625, | |
| "rewards/margins": 1.0439453125, | |
| "rewards/rejected": -4.453125, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 9.963559206561316, | |
| "learning_rate": 2.4214202723296923e-07, | |
| "logits/chosen": -0.3682861328125, | |
| "logits/rejected": -0.392578125, | |
| "logps/chosen": -666.5, | |
| "logps/rejected": -715.0, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.439453125, | |
| "rewards/margins": 0.968505859375, | |
| "rewards/rejected": -4.41015625, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_logits/chosen": -0.30975341796875, | |
| "eval_logits/rejected": -0.39111328125, | |
| "eval_logps/chosen": -649.75, | |
| "eval_logps/rejected": -732.0, | |
| "eval_loss": 0.4821406304836273, | |
| "eval_rewards/accuracies": 0.737500011920929, | |
| "eval_rewards/chosen": -3.2939453125, | |
| "eval_rewards/margins": 1.017822265625, | |
| "eval_rewards/rejected": -4.3095703125, | |
| "eval_runtime": 27.6376, | |
| "eval_samples_per_second": 18.091, | |
| "eval_steps_per_second": 0.579, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.64128, | |
| "grad_norm": 9.125881510784152, | |
| "learning_rate": 2.4065301973280486e-07, | |
| "logits/chosen": -0.3564453125, | |
| "logits/rejected": -0.4180908203125, | |
| "logps/chosen": -678.5, | |
| "logps/rejected": -829.5, | |
| "loss": 0.4451, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.44921875, | |
| "rewards/margins": 1.13232421875, | |
| "rewards/rejected": -4.58203125, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.64256, | |
| "grad_norm": 17.366268825982257, | |
| "learning_rate": 2.391662021694847e-07, | |
| "logits/chosen": -0.3870849609375, | |
| "logits/rejected": -0.4088134765625, | |
| "logps/chosen": -630.5, | |
| "logps/rejected": -714.0, | |
| "loss": 0.5572, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.23828125, | |
| "rewards/margins": 0.932861328125, | |
| "rewards/rejected": -4.16796875, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.64384, | |
| "grad_norm": 15.66439643538157, | |
| "learning_rate": 2.3768160432011394e-07, | |
| "logits/chosen": -0.332275390625, | |
| "logits/rejected": -0.3670654296875, | |
| "logps/chosen": -644.5, | |
| "logps/rejected": -734.5, | |
| "loss": 0.4228, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -2.994140625, | |
| "rewards/margins": 1.0615234375, | |
| "rewards/rejected": -4.0546875, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.64512, | |
| "grad_norm": 13.777723328035924, | |
| "learning_rate": 2.361992559173432e-07, | |
| "logits/chosen": -0.399169921875, | |
| "logits/rejected": -0.425537109375, | |
| "logps/chosen": -669.5, | |
| "logps/rejected": -773.5, | |
| "loss": 0.4306, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.2421875, | |
| "rewards/margins": 1.294921875, | |
| "rewards/rejected": -4.53515625, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6464, | |
| "grad_norm": 9.719276284192894, | |
| "learning_rate": 2.3471918664877214e-07, | |
| "logits/chosen": -0.35498046875, | |
| "logits/rejected": -0.3848876953125, | |
| "logps/chosen": -668.5, | |
| "logps/rejected": -758.5, | |
| "loss": 0.4765, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.39453125, | |
| "rewards/margins": 1.04248046875, | |
| "rewards/rejected": -4.439453125, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.64768, | |
| "grad_norm": 16.89908454302761, | |
| "learning_rate": 2.3324142615635527e-07, | |
| "logits/chosen": -0.355712890625, | |
| "logits/rejected": -0.3896484375, | |
| "logps/chosen": -676.0, | |
| "logps/rejected": -796.0, | |
| "loss": 0.3675, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.345703125, | |
| "rewards/margins": 1.384765625, | |
| "rewards/rejected": -4.73046875, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.64896, | |
| "grad_norm": 9.037083694795681, | |
| "learning_rate": 2.317660040358085e-07, | |
| "logits/chosen": -0.2799072265625, | |
| "logits/rejected": -0.35302734375, | |
| "logps/chosen": -653.5, | |
| "logps/rejected": -813.5, | |
| "loss": 0.4096, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.427734375, | |
| "rewards/margins": 1.4033203125, | |
| "rewards/rejected": -4.83203125, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.65024, | |
| "grad_norm": 13.711223830769715, | |
| "learning_rate": 2.3029294983601597e-07, | |
| "logits/chosen": -0.3631591796875, | |
| "logits/rejected": -0.3857421875, | |
| "logps/chosen": -639.0, | |
| "logps/rejected": -715.0, | |
| "loss": 0.5367, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -3.181640625, | |
| "rewards/margins": 0.947265625, | |
| "rewards/rejected": -4.130859375, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.65152, | |
| "grad_norm": 15.850008639937514, | |
| "learning_rate": 2.2882229305843867e-07, | |
| "logits/chosen": -0.31689453125, | |
| "logits/rejected": -0.3739013671875, | |
| "logps/chosen": -603.0, | |
| "logps/rejected": -715.0, | |
| "loss": 0.4067, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -3.0390625, | |
| "rewards/margins": 1.18798828125, | |
| "rewards/rejected": -4.22265625, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.6528, | |
| "grad_norm": 13.27611006512659, | |
| "learning_rate": 2.2735406315652325e-07, | |
| "logits/chosen": -0.3470458984375, | |
| "logits/rejected": -0.40087890625, | |
| "logps/chosen": -654.5, | |
| "logps/rejected": -778.5, | |
| "loss": 0.4238, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.30078125, | |
| "rewards/margins": 1.177734375, | |
| "rewards/rejected": -4.47265625, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.65408, | |
| "grad_norm": 9.557828693226199, | |
| "learning_rate": 2.2588828953511252e-07, | |
| "logits/chosen": -0.250823974609375, | |
| "logits/rejected": -0.336669921875, | |
| "logps/chosen": -663.0, | |
| "logps/rejected": -774.0, | |
| "loss": 0.513, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.5625, | |
| "rewards/margins": 1.05712890625, | |
| "rewards/rejected": -4.625, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.65536, | |
| "grad_norm": 15.537086503024735, | |
| "learning_rate": 2.2442500154985642e-07, | |
| "logits/chosen": -0.236083984375, | |
| "logits/rejected": -0.26043701171875, | |
| "logps/chosen": -640.5, | |
| "logps/rejected": -765.5, | |
| "loss": 0.5179, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.447265625, | |
| "rewards/margins": 1.009765625, | |
| "rewards/rejected": -4.458984375, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.65664, | |
| "grad_norm": 8.84797363334171, | |
| "learning_rate": 2.229642285066236e-07, | |
| "logits/chosen": -0.33831787109375, | |
| "logits/rejected": -0.3756103515625, | |
| "logps/chosen": -658.0, | |
| "logps/rejected": -813.0, | |
| "loss": 0.3957, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.5234375, | |
| "rewards/margins": 1.4072265625, | |
| "rewards/rejected": -4.931640625, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.65792, | |
| "grad_norm": 8.898032465683093, | |
| "learning_rate": 2.2150599966091535e-07, | |
| "logits/chosen": -0.23968505859375, | |
| "logits/rejected": -0.240509033203125, | |
| "logps/chosen": -670.0, | |
| "logps/rejected": -768.0, | |
| "loss": 0.4581, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.576171875, | |
| "rewards/margins": 1.08349609375, | |
| "rewards/rejected": -4.66015625, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.6592, | |
| "grad_norm": 9.301895530550201, | |
| "learning_rate": 2.200503442172792e-07, | |
| "logits/chosen": -0.2496337890625, | |
| "logits/rejected": -0.316650390625, | |
| "logps/chosen": -694.5, | |
| "logps/rejected": -817.0, | |
| "loss": 0.4339, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.78515625, | |
| "rewards/margins": 1.220703125, | |
| "rewards/rejected": -5.0, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.66048, | |
| "grad_norm": 9.315272518266339, | |
| "learning_rate": 2.1859729132872407e-07, | |
| "logits/chosen": -0.2623291015625, | |
| "logits/rejected": -0.2822265625, | |
| "logps/chosen": -610.25, | |
| "logps/rejected": -698.0, | |
| "loss": 0.5085, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.298828125, | |
| "rewards/margins": 1.0263671875, | |
| "rewards/rejected": -4.326171875, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.66176, | |
| "grad_norm": 8.846499820634536, | |
| "learning_rate": 2.171468700961363e-07, | |
| "logits/chosen": -0.243621826171875, | |
| "logits/rejected": -0.24200439453125, | |
| "logps/chosen": -702.5, | |
| "logps/rejected": -804.0, | |
| "loss": 0.4638, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.7265625, | |
| "rewards/margins": 1.20751953125, | |
| "rewards/rejected": -4.931640625, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.66304, | |
| "grad_norm": 19.450404147912273, | |
| "learning_rate": 2.1569910956769707e-07, | |
| "logits/chosen": -0.264404296875, | |
| "logits/rejected": -0.3031005859375, | |
| "logps/chosen": -731.0, | |
| "logps/rejected": -857.0, | |
| "loss": 0.3519, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.779296875, | |
| "rewards/margins": 1.4775390625, | |
| "rewards/rejected": -5.25390625, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.66432, | |
| "grad_norm": 11.9383597079162, | |
| "learning_rate": 2.1425403873830082e-07, | |
| "logits/chosen": -0.267974853515625, | |
| "logits/rejected": -0.300872802734375, | |
| "logps/chosen": -758.5, | |
| "logps/rejected": -859.5, | |
| "loss": 0.4013, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.015625, | |
| "rewards/margins": 1.35791015625, | |
| "rewards/rejected": -5.37109375, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6656, | |
| "grad_norm": 9.054749925855036, | |
| "learning_rate": 2.1281168654897377e-07, | |
| "logits/chosen": -0.24322509765625, | |
| "logits/rejected": -0.27294921875, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -846.0, | |
| "loss": 0.4282, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.001953125, | |
| "rewards/margins": 1.291015625, | |
| "rewards/rejected": -5.29296875, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.66688, | |
| "grad_norm": 17.341035886669818, | |
| "learning_rate": 2.113720818862951e-07, | |
| "logits/chosen": -0.206298828125, | |
| "logits/rejected": -0.20355224609375, | |
| "logps/chosen": -724.0, | |
| "logps/rejected": -787.0, | |
| "loss": 0.5664, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -3.859375, | |
| "rewards/margins": 0.95849609375, | |
| "rewards/rejected": -4.81640625, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.66816, | |
| "grad_norm": 9.905351905849056, | |
| "learning_rate": 2.0993525358181822e-07, | |
| "logits/chosen": -0.275146484375, | |
| "logits/rejected": -0.3148193359375, | |
| "logps/chosen": -760.0, | |
| "logps/rejected": -883.0, | |
| "loss": 0.4874, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.158203125, | |
| "rewards/margins": 1.284912109375, | |
| "rewards/rejected": -5.453125, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.66944, | |
| "grad_norm": 11.092239401478107, | |
| "learning_rate": 2.085012304114933e-07, | |
| "logits/chosen": -0.150665283203125, | |
| "logits/rejected": -0.187652587890625, | |
| "logps/chosen": -728.5, | |
| "logps/rejected": -839.0, | |
| "loss": 0.4484, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.9375, | |
| "rewards/margins": 1.2138671875, | |
| "rewards/rejected": -5.14453125, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.67072, | |
| "grad_norm": 15.689220713255043, | |
| "learning_rate": 2.0707004109509057e-07, | |
| "logits/chosen": -0.125213623046875, | |
| "logits/rejected": -0.13275146484375, | |
| "logps/chosen": -734.5, | |
| "logps/rejected": -794.0, | |
| "loss": 0.5323, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -4.185546875, | |
| "rewards/margins": 1.04541015625, | |
| "rewards/rejected": -5.23828125, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 22.019953630911694, | |
| "learning_rate": 2.0564171429562586e-07, | |
| "logits/chosen": -0.18068695068359375, | |
| "logits/rejected": -0.2034912109375, | |
| "logps/chosen": -683.0, | |
| "logps/rejected": -821.5, | |
| "loss": 0.3464, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -3.689453125, | |
| "rewards/margins": 1.6474609375, | |
| "rewards/rejected": -5.328125, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.67328, | |
| "grad_norm": 9.269392815729315, | |
| "learning_rate": 2.042162786187862e-07, | |
| "logits/chosen": -0.1710357666015625, | |
| "logits/rejected": -0.20377349853515625, | |
| "logps/chosen": -755.5, | |
| "logps/rejected": -875.5, | |
| "loss": 0.4172, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -4.275390625, | |
| "rewards/margins": 1.36376953125, | |
| "rewards/rejected": -5.63671875, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.67456, | |
| "grad_norm": 18.30397649803818, | |
| "learning_rate": 2.027937626123565e-07, | |
| "logits/chosen": -0.2088623046875, | |
| "logits/rejected": -0.22777557373046875, | |
| "logps/chosen": -816.0, | |
| "logps/rejected": -960.0, | |
| "loss": 0.4332, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.57421875, | |
| "rewards/margins": 1.33251953125, | |
| "rewards/rejected": -5.91015625, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.67584, | |
| "grad_norm": 10.323921457948874, | |
| "learning_rate": 2.0137419476564897e-07, | |
| "logits/chosen": -0.12468719482421875, | |
| "logits/rejected": -0.16400146484375, | |
| "logps/chosen": -781.0, | |
| "logps/rejected": -894.0, | |
| "loss": 0.4222, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.625, | |
| "rewards/margins": 1.38525390625, | |
| "rewards/rejected": -6.00390625, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.67712, | |
| "grad_norm": 19.985890606151916, | |
| "learning_rate": 1.9995760350893097e-07, | |
| "logits/chosen": -0.15547943115234375, | |
| "logits/rejected": -0.218109130859375, | |
| "logps/chosen": -795.5, | |
| "logps/rejected": -912.0, | |
| "loss": 0.4694, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.646484375, | |
| "rewards/margins": 1.234375, | |
| "rewards/rejected": -5.88671875, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.6784, | |
| "grad_norm": 20.81916495287454, | |
| "learning_rate": 1.985440172128573e-07, | |
| "logits/chosen": -0.15631103515625, | |
| "logits/rejected": -0.201385498046875, | |
| "logps/chosen": -786.0, | |
| "logps/rejected": -920.5, | |
| "loss": 0.3556, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -4.556640625, | |
| "rewards/margins": 1.4970703125, | |
| "rewards/rejected": -6.05859375, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.67968, | |
| "grad_norm": 11.921218446157969, | |
| "learning_rate": 1.9713346418790056e-07, | |
| "logits/chosen": -0.126007080078125, | |
| "logits/rejected": -0.188629150390625, | |
| "logps/chosen": -756.5, | |
| "logps/rejected": -899.0, | |
| "loss": 0.4957, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.63671875, | |
| "rewards/margins": 1.23779296875, | |
| "rewards/rejected": -5.87109375, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.68096, | |
| "grad_norm": 11.545371418939542, | |
| "learning_rate": 1.957259726837849e-07, | |
| "logits/chosen": -0.177947998046875, | |
| "logits/rejected": -0.2209320068359375, | |
| "logps/chosen": -775.0, | |
| "logps/rejected": -927.0, | |
| "loss": 0.4769, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.494140625, | |
| "rewards/margins": 1.47119140625, | |
| "rewards/rejected": -5.97265625, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.68224, | |
| "grad_norm": 19.85526612585022, | |
| "learning_rate": 1.9432157088892065e-07, | |
| "logits/chosen": -0.1734619140625, | |
| "logits/rejected": -0.236083984375, | |
| "logps/chosen": -811.5, | |
| "logps/rejected": -915.0, | |
| "loss": 0.4809, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.828125, | |
| "rewards/margins": 1.300048828125, | |
| "rewards/rejected": -6.1328125, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.68352, | |
| "grad_norm": 35.982005117679044, | |
| "learning_rate": 1.9292028692983824e-07, | |
| "logits/chosen": -0.162567138671875, | |
| "logits/rejected": -0.21380615234375, | |
| "logps/chosen": -779.0, | |
| "logps/rejected": -879.5, | |
| "loss": 0.5361, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.6953125, | |
| "rewards/margins": 1.1611328125, | |
| "rewards/rejected": -5.8515625, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6848, | |
| "grad_norm": 9.401343460064473, | |
| "learning_rate": 1.9152214887062702e-07, | |
| "logits/chosen": -0.18548583984375, | |
| "logits/rejected": -0.222076416015625, | |
| "logps/chosen": -784.0, | |
| "logps/rejected": -911.5, | |
| "loss": 0.4402, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.724609375, | |
| "rewards/margins": 1.4794921875, | |
| "rewards/rejected": -6.19921875, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.68608, | |
| "grad_norm": 19.636202317515714, | |
| "learning_rate": 1.9012718471237144e-07, | |
| "logits/chosen": -0.20050048828125, | |
| "logits/rejected": -0.25152587890625, | |
| "logps/chosen": -883.5, | |
| "logps/rejected": -995.0, | |
| "loss": 0.4611, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -5.19140625, | |
| "rewards/margins": 1.3603515625, | |
| "rewards/rejected": -6.55078125, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.68736, | |
| "grad_norm": 9.609636299261897, | |
| "learning_rate": 1.8873542239259109e-07, | |
| "logits/chosen": -0.13946533203125, | |
| "logits/rejected": -0.18389892578125, | |
| "logps/chosen": -783.5, | |
| "logps/rejected": -957.0, | |
| "loss": 0.4048, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.6484375, | |
| "rewards/margins": 1.47802734375, | |
| "rewards/rejected": -6.125, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.68864, | |
| "grad_norm": 28.14830330305055, | |
| "learning_rate": 1.8734688978468098e-07, | |
| "logits/chosen": -0.23590087890625, | |
| "logits/rejected": -0.26971435546875, | |
| "logps/chosen": -811.0, | |
| "logps/rejected": -912.0, | |
| "loss": 0.5124, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.46875, | |
| "rewards/margins": 1.19091796875, | |
| "rewards/rejected": -5.66015625, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.68992, | |
| "grad_norm": 17.73425969488332, | |
| "learning_rate": 1.8596161469735374e-07, | |
| "logits/chosen": -0.2036590576171875, | |
| "logits/rejected": -0.2774658203125, | |
| "logps/chosen": -808.0, | |
| "logps/rejected": -948.0, | |
| "loss": 0.4463, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.76953125, | |
| "rewards/margins": 1.41064453125, | |
| "rewards/rejected": -6.17578125, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.6912, | |
| "grad_norm": 25.111899072723798, | |
| "learning_rate": 1.8457962487408174e-07, | |
| "logits/chosen": -0.12348175048828125, | |
| "logits/rejected": -0.16607093811035156, | |
| "logps/chosen": -771.0, | |
| "logps/rejected": -857.0, | |
| "loss": 0.5007, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.49609375, | |
| "rewards/margins": 1.124267578125, | |
| "rewards/rejected": -5.6171875, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.69248, | |
| "grad_norm": 12.189044823657042, | |
| "learning_rate": 1.8320094799254222e-07, | |
| "logits/chosen": -0.20697021484375, | |
| "logits/rejected": -0.2476806640625, | |
| "logps/chosen": -793.0, | |
| "logps/rejected": -945.5, | |
| "loss": 0.4448, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.625, | |
| "rewards/margins": 1.31787109375, | |
| "rewards/rejected": -5.9453125, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.69376, | |
| "grad_norm": 19.576872251438612, | |
| "learning_rate": 1.8182561166406308e-07, | |
| "logits/chosen": -0.22357177734375, | |
| "logits/rejected": -0.2642822265625, | |
| "logps/chosen": -749.5, | |
| "logps/rejected": -835.0, | |
| "loss": 0.5487, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.265625, | |
| "rewards/margins": 0.9267578125, | |
| "rewards/rejected": -5.1953125, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.69504, | |
| "grad_norm": 9.991255716215326, | |
| "learning_rate": 1.8045364343306914e-07, | |
| "logits/chosen": -0.1717987060546875, | |
| "logits/rejected": -0.21246337890625, | |
| "logps/chosen": -731.5, | |
| "logps/rejected": -863.5, | |
| "loss": 0.4698, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.220703125, | |
| "rewards/margins": 1.2138671875, | |
| "rewards/rejected": -5.4375, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.69632, | |
| "grad_norm": 9.256546508375417, | |
| "learning_rate": 1.7908507077653123e-07, | |
| "logits/chosen": -0.220703125, | |
| "logits/rejected": -0.2713623046875, | |
| "logps/chosen": -718.5, | |
| "logps/rejected": -876.0, | |
| "loss": 0.412, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.1015625, | |
| "rewards/margins": 1.4931640625, | |
| "rewards/rejected": -5.59375, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6976, | |
| "grad_norm": 10.085774446505868, | |
| "learning_rate": 1.7771992110341532e-07, | |
| "logits/chosen": -0.181396484375, | |
| "logits/rejected": -0.2060546875, | |
| "logps/chosen": -746.0, | |
| "logps/rejected": -893.0, | |
| "loss": 0.4511, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.97265625, | |
| "rewards/margins": 1.49462890625, | |
| "rewards/rejected": -5.46875, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.69888, | |
| "grad_norm": 10.969992056433007, | |
| "learning_rate": 1.7635822175413445e-07, | |
| "logits/chosen": -0.284912109375, | |
| "logits/rejected": -0.3399658203125, | |
| "logps/chosen": -665.0, | |
| "logps/rejected": -803.0, | |
| "loss": 0.4535, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.654296875, | |
| "rewards/margins": 1.290283203125, | |
| "rewards/rejected": -4.94140625, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.70016, | |
| "grad_norm": 18.256669397800174, | |
| "learning_rate": 1.7500000000000007e-07, | |
| "logits/chosen": -0.3035888671875, | |
| "logits/rejected": -0.346923828125, | |
| "logps/chosen": -667.0, | |
| "logps/rejected": -792.0, | |
| "loss": 0.3832, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -3.58984375, | |
| "rewards/margins": 1.271484375, | |
| "rewards/rejected": -4.86328125, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.70144, | |
| "grad_norm": 12.455904821192838, | |
| "learning_rate": 1.7364528304267644e-07, | |
| "logits/chosen": -0.2991943359375, | |
| "logits/rejected": -0.333740234375, | |
| "logps/chosen": -641.5, | |
| "logps/rejected": -753.0, | |
| "loss": 0.437, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.29296875, | |
| "rewards/margins": 1.140625, | |
| "rewards/rejected": -4.435546875, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.70272, | |
| "grad_norm": 9.000099856418279, | |
| "learning_rate": 1.7229409801363634e-07, | |
| "logits/chosen": -0.29541015625, | |
| "logits/rejected": -0.36572265625, | |
| "logps/chosen": -667.5, | |
| "logps/rejected": -761.5, | |
| "loss": 0.4534, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.552734375, | |
| "rewards/margins": 1.12939453125, | |
| "rewards/rejected": -4.68359375, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 9.631183638602456, | |
| "learning_rate": 1.7094647197361656e-07, | |
| "logits/chosen": -0.322052001953125, | |
| "logits/rejected": -0.3463134765625, | |
| "logps/chosen": -696.5, | |
| "logps/rejected": -818.0, | |
| "loss": 0.4881, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.470703125, | |
| "rewards/margins": 1.14697265625, | |
| "rewards/rejected": -4.615234375, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.70528, | |
| "grad_norm": 10.011321742256536, | |
| "learning_rate": 1.6960243191207686e-07, | |
| "logits/chosen": -0.296875, | |
| "logits/rejected": -0.35693359375, | |
| "logps/chosen": -691.5, | |
| "logps/rejected": -786.0, | |
| "loss": 0.4621, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.609375, | |
| "rewards/margins": 1.216796875, | |
| "rewards/rejected": -4.8203125, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.70656, | |
| "grad_norm": 13.798592755194116, | |
| "learning_rate": 1.682620047466589e-07, | |
| "logits/chosen": -0.35546875, | |
| "logits/rejected": -0.391357421875, | |
| "logps/chosen": -674.0, | |
| "logps/rejected": -765.5, | |
| "loss": 0.4362, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.546875, | |
| "rewards/margins": 1.2548828125, | |
| "rewards/rejected": -4.80078125, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.70784, | |
| "grad_norm": 8.22483168916061, | |
| "learning_rate": 1.6692521732264789e-07, | |
| "logits/chosen": -0.32275390625, | |
| "logits/rejected": -0.35107421875, | |
| "logps/chosen": -708.0, | |
| "logps/rejected": -820.0, | |
| "loss": 0.4363, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.662109375, | |
| "rewards/margins": 1.157958984375, | |
| "rewards/rejected": -4.82421875, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.70912, | |
| "grad_norm": 11.12423189134255, | |
| "learning_rate": 1.655920964124339e-07, | |
| "logits/chosen": -0.30712890625, | |
| "logits/rejected": -0.342376708984375, | |
| "logps/chosen": -666.5, | |
| "logps/rejected": -764.5, | |
| "loss": 0.4255, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.408203125, | |
| "rewards/margins": 1.1845703125, | |
| "rewards/rejected": -4.595703125, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7104, | |
| "grad_norm": 9.470624413639532, | |
| "learning_rate": 1.642626687149765e-07, | |
| "logits/chosen": -0.298095703125, | |
| "logits/rejected": -0.3577880859375, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -760.0, | |
| "loss": 0.4545, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.388671875, | |
| "rewards/margins": 1.16259765625, | |
| "rewards/rejected": -4.5546875, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.71168, | |
| "grad_norm": 10.343750237066667, | |
| "learning_rate": 1.629369608552696e-07, | |
| "logits/chosen": -0.3017578125, | |
| "logits/rejected": -0.359619140625, | |
| "logps/chosen": -703.5, | |
| "logps/rejected": -814.5, | |
| "loss": 0.4353, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.642578125, | |
| "rewards/margins": 1.1728515625, | |
| "rewards/rejected": -4.81640625, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.71296, | |
| "grad_norm": 10.683608221205487, | |
| "learning_rate": 1.6161499938380874e-07, | |
| "logits/chosen": -0.3658447265625, | |
| "logits/rejected": -0.42822265625, | |
| "logps/chosen": -701.5, | |
| "logps/rejected": -829.0, | |
| "loss": 0.4438, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.548828125, | |
| "rewards/margins": 1.4580078125, | |
| "rewards/rejected": -5.005859375, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.71424, | |
| "grad_norm": 9.927387665144652, | |
| "learning_rate": 1.6029681077605865e-07, | |
| "logits/chosen": -0.347412109375, | |
| "logits/rejected": -0.39990234375, | |
| "logps/chosen": -686.5, | |
| "logps/rejected": -856.5, | |
| "loss": 0.4152, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.654296875, | |
| "rewards/margins": 1.3212890625, | |
| "rewards/rejected": -4.9765625, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.71552, | |
| "grad_norm": 8.62349737887557, | |
| "learning_rate": 1.5898242143192336e-07, | |
| "logits/chosen": -0.307373046875, | |
| "logits/rejected": -0.3692626953125, | |
| "logps/chosen": -644.5, | |
| "logps/rejected": -791.5, | |
| "loss": 0.3838, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.294921875, | |
| "rewards/margins": 1.3662109375, | |
| "rewards/rejected": -4.66796875, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.7168, | |
| "grad_norm": 20.08172331534478, | |
| "learning_rate": 1.576718576752179e-07, | |
| "logits/chosen": -0.25872802734375, | |
| "logits/rejected": -0.3118896484375, | |
| "logps/chosen": -621.0, | |
| "logps/rejected": -742.5, | |
| "loss": 0.4035, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.26171875, | |
| "rewards/margins": 1.31884765625, | |
| "rewards/rejected": -4.58203125, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.71808, | |
| "grad_norm": 9.964788088459969, | |
| "learning_rate": 1.5636514575314023e-07, | |
| "logits/chosen": -0.3486328125, | |
| "logits/rejected": -0.4261474609375, | |
| "logps/chosen": -727.0, | |
| "logps/rejected": -832.0, | |
| "loss": 0.5178, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.638671875, | |
| "rewards/margins": 1.04345703125, | |
| "rewards/rejected": -4.6796875, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.71936, | |
| "grad_norm": 8.246036203914949, | |
| "learning_rate": 1.550623118357463e-07, | |
| "logits/chosen": -0.261474609375, | |
| "logits/rejected": -0.327880859375, | |
| "logps/chosen": -699.0, | |
| "logps/rejected": -844.0, | |
| "loss": 0.4117, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.875, | |
| "rewards/margins": 1.34619140625, | |
| "rewards/rejected": -5.22265625, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.72064, | |
| "grad_norm": 9.24571549890142, | |
| "learning_rate": 1.5376338201542535e-07, | |
| "logits/chosen": -0.3028564453125, | |
| "logits/rejected": -0.3468017578125, | |
| "logps/chosen": -667.0, | |
| "logps/rejected": -782.0, | |
| "loss": 0.4665, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -3.62109375, | |
| "rewards/margins": 1.04150390625, | |
| "rewards/rejected": -4.66015625, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.72192, | |
| "grad_norm": 11.314423673740029, | |
| "learning_rate": 1.524683823063783e-07, | |
| "logits/chosen": -0.33697509765625, | |
| "logits/rejected": -0.35467529296875, | |
| "logps/chosen": -703.0, | |
| "logps/rejected": -758.5, | |
| "loss": 0.5141, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.708984375, | |
| "rewards/margins": 1.0166015625, | |
| "rewards/rejected": -4.73046875, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.7232, | |
| "grad_norm": 8.185307525881214, | |
| "learning_rate": 1.5117733864409549e-07, | |
| "logits/chosen": -0.247314453125, | |
| "logits/rejected": -0.314208984375, | |
| "logps/chosen": -650.5, | |
| "logps/rejected": -819.5, | |
| "loss": 0.3748, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.607421875, | |
| "rewards/margins": 1.447265625, | |
| "rewards/rejected": -5.05859375, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.72448, | |
| "grad_norm": 18.20769264970902, | |
| "learning_rate": 1.4989027688483806e-07, | |
| "logits/chosen": -0.311767578125, | |
| "logits/rejected": -0.364501953125, | |
| "logps/chosen": -695.5, | |
| "logps/rejected": -808.0, | |
| "loss": 0.4326, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.751953125, | |
| "rewards/margins": 1.30908203125, | |
| "rewards/rejected": -5.06640625, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.72576, | |
| "grad_norm": 8.80192874409478, | |
| "learning_rate": 1.4860722280512023e-07, | |
| "logits/chosen": -0.2942657470703125, | |
| "logits/rejected": -0.33642578125, | |
| "logps/chosen": -649.5, | |
| "logps/rejected": -767.0, | |
| "loss": 0.4278, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.607421875, | |
| "rewards/margins": 1.18017578125, | |
| "rewards/rejected": -4.78515625, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.72704, | |
| "grad_norm": 10.943134384510522, | |
| "learning_rate": 1.4732820210119238e-07, | |
| "logits/chosen": -0.3031005859375, | |
| "logits/rejected": -0.3203125, | |
| "logps/chosen": -734.0, | |
| "logps/rejected": -838.0, | |
| "loss": 0.3886, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.98828125, | |
| "rewards/margins": 1.3251953125, | |
| "rewards/rejected": -5.3125, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.72832, | |
| "grad_norm": 11.85355005830973, | |
| "learning_rate": 1.4605324038852707e-07, | |
| "logits/chosen": -0.20941162109375, | |
| "logits/rejected": -0.2779541015625, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -806.5, | |
| "loss": 0.4069, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -3.740234375, | |
| "rewards/margins": 1.44677734375, | |
| "rewards/rejected": -5.1875, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.7296, | |
| "grad_norm": 8.286237978866287, | |
| "learning_rate": 1.4478236320130553e-07, | |
| "logits/chosen": -0.217041015625, | |
| "logits/rejected": -0.267822265625, | |
| "logps/chosen": -680.0, | |
| "logps/rejected": -779.0, | |
| "loss": 0.4393, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.6484375, | |
| "rewards/margins": 1.163330078125, | |
| "rewards/rejected": -4.8125, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.73088, | |
| "grad_norm": 13.68132204214939, | |
| "learning_rate": 1.4351559599190707e-07, | |
| "logits/chosen": -0.17962646484375, | |
| "logits/rejected": -0.239990234375, | |
| "logps/chosen": -720.0, | |
| "logps/rejected": -853.0, | |
| "loss": 0.4864, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.06640625, | |
| "rewards/margins": 1.2568359375, | |
| "rewards/rejected": -5.32421875, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.73216, | |
| "grad_norm": 9.213667928480618, | |
| "learning_rate": 1.4225296413039794e-07, | |
| "logits/chosen": -0.20670700073242188, | |
| "logits/rejected": -0.26763916015625, | |
| "logps/chosen": -705.0, | |
| "logps/rejected": -866.0, | |
| "loss": 0.4484, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.021484375, | |
| "rewards/margins": 1.3505859375, | |
| "rewards/rejected": -5.37109375, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.73344, | |
| "grad_norm": 9.155271393542527, | |
| "learning_rate": 1.409944929040249e-07, | |
| "logits/chosen": -0.188690185546875, | |
| "logits/rejected": -0.23065185546875, | |
| "logps/chosen": -703.5, | |
| "logps/rejected": -821.5, | |
| "loss": 0.4569, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.984375, | |
| "rewards/margins": 1.224609375, | |
| "rewards/rejected": -5.21484375, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.73472, | |
| "grad_norm": 12.402920193014143, | |
| "learning_rate": 1.3974020751670732e-07, | |
| "logits/chosen": -0.237548828125, | |
| "logits/rejected": -0.302734375, | |
| "logps/chosen": -666.0, | |
| "logps/rejected": -824.5, | |
| "loss": 0.4361, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.7734375, | |
| "rewards/margins": 1.27490234375, | |
| "rewards/rejected": -5.048828125, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 13.672795096747066, | |
| "learning_rate": 1.3849013308853368e-07, | |
| "logits/chosen": -0.1479034423828125, | |
| "logits/rejected": -0.21832275390625, | |
| "logps/chosen": -779.0, | |
| "logps/rejected": -881.5, | |
| "loss": 0.4502, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.23828125, | |
| "rewards/margins": 1.22607421875, | |
| "rewards/rejected": -5.4609375, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.73728, | |
| "grad_norm": 9.971405880324383, | |
| "learning_rate": 1.3724429465525732e-07, | |
| "logits/chosen": -0.1641998291015625, | |
| "logits/rejected": -0.186248779296875, | |
| "logps/chosen": -713.0, | |
| "logps/rejected": -790.0, | |
| "loss": 0.499, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.955078125, | |
| "rewards/margins": 1.1162109375, | |
| "rewards/rejected": -5.06640625, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.73856, | |
| "grad_norm": 8.772600149464196, | |
| "learning_rate": 1.360027171677957e-07, | |
| "logits/chosen": -0.2245025634765625, | |
| "logits/rejected": -0.2720947265625, | |
| "logps/chosen": -705.0, | |
| "logps/rejected": -841.0, | |
| "loss": 0.4277, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.859375, | |
| "rewards/margins": 1.580078125, | |
| "rewards/rejected": -5.4375, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.73984, | |
| "grad_norm": 18.81863742515878, | |
| "learning_rate": 1.3476542549173096e-07, | |
| "logits/chosen": -0.2587890625, | |
| "logits/rejected": -0.3282470703125, | |
| "logps/chosen": -762.5, | |
| "logps/rejected": -856.0, | |
| "loss": 0.5151, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.23828125, | |
| "rewards/margins": 1.15771484375, | |
| "rewards/rejected": -5.39453125, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.74112, | |
| "grad_norm": 9.936151878622386, | |
| "learning_rate": 1.335324444068108e-07, | |
| "logits/chosen": -0.2752685546875, | |
| "logits/rejected": -0.334228515625, | |
| "logps/chosen": -753.0, | |
| "logps/rejected": -913.0, | |
| "loss": 0.3748, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -4.279296875, | |
| "rewards/margins": 1.607421875, | |
| "rewards/rejected": -5.890625, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.7424, | |
| "grad_norm": 14.86753917739169, | |
| "learning_rate": 1.3230379860645365e-07, | |
| "logits/chosen": -0.25177001953125, | |
| "logits/rejected": -0.300537109375, | |
| "logps/chosen": -718.0, | |
| "logps/rejected": -872.0, | |
| "loss": 0.3886, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.125, | |
| "rewards/margins": 1.49072265625, | |
| "rewards/rejected": -5.625, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.74368, | |
| "grad_norm": 14.0480837535775, | |
| "learning_rate": 1.3107951269725286e-07, | |
| "logits/chosen": -0.1736297607421875, | |
| "logits/rejected": -0.22271728515625, | |
| "logps/chosen": -788.5, | |
| "logps/rejected": -911.5, | |
| "loss": 0.4652, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.408203125, | |
| "rewards/margins": 1.3837890625, | |
| "rewards/rejected": -5.8046875, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.74496, | |
| "grad_norm": 9.252489484832754, | |
| "learning_rate": 1.2985961119848506e-07, | |
| "logits/chosen": -0.15355682373046875, | |
| "logits/rejected": -0.20587158203125, | |
| "logps/chosen": -729.5, | |
| "logps/rejected": -853.0, | |
| "loss": 0.4643, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.099609375, | |
| "rewards/margins": 1.3515625, | |
| "rewards/rejected": -5.453125, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.74624, | |
| "grad_norm": 18.79861359200076, | |
| "learning_rate": 1.28644118541618e-07, | |
| "logits/chosen": -0.179351806640625, | |
| "logits/rejected": -0.22210693359375, | |
| "logps/chosen": -792.0, | |
| "logps/rejected": -912.0, | |
| "loss": 0.5022, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.345703125, | |
| "rewards/margins": 1.33837890625, | |
| "rewards/rejected": -5.68359375, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.74752, | |
| "grad_norm": 9.556413731103051, | |
| "learning_rate": 1.2743305906982183e-07, | |
| "logits/chosen": -0.142608642578125, | |
| "logits/rejected": -0.1815185546875, | |
| "logps/chosen": -772.0, | |
| "logps/rejected": -916.0, | |
| "loss": 0.4378, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.3984375, | |
| "rewards/margins": 1.37841796875, | |
| "rewards/rejected": -5.7734375, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7488, | |
| "grad_norm": 10.524837392255892, | |
| "learning_rate": 1.2622645703748163e-07, | |
| "logits/chosen": -0.0958251953125, | |
| "logits/rejected": -0.1723480224609375, | |
| "logps/chosen": -704.5, | |
| "logps/rejected": -864.5, | |
| "loss": 0.4003, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.115234375, | |
| "rewards/margins": 1.41064453125, | |
| "rewards/rejected": -5.5234375, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.75008, | |
| "grad_norm": 13.312445253403741, | |
| "learning_rate": 1.2502433660971123e-07, | |
| "logits/chosen": -0.19342041015625, | |
| "logits/rejected": -0.23333740234375, | |
| "logps/chosen": -759.0, | |
| "logps/rejected": -883.0, | |
| "loss": 0.3936, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.291015625, | |
| "rewards/margins": 1.40185546875, | |
| "rewards/rejected": -5.69921875, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.75136, | |
| "grad_norm": 8.960818660553267, | |
| "learning_rate": 1.2382672186187003e-07, | |
| "logits/chosen": -0.2032470703125, | |
| "logits/rejected": -0.2041015625, | |
| "logps/chosen": -725.5, | |
| "logps/rejected": -849.0, | |
| "loss": 0.3993, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.953125, | |
| "rewards/margins": 1.3955078125, | |
| "rewards/rejected": -5.3515625, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.75264, | |
| "grad_norm": 10.646689306325051, | |
| "learning_rate": 1.2263363677907974e-07, | |
| "logits/chosen": -0.123687744140625, | |
| "logits/rejected": -0.1717071533203125, | |
| "logps/chosen": -723.0, | |
| "logps/rejected": -856.5, | |
| "loss": 0.4223, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.0390625, | |
| "rewards/margins": 1.4228515625, | |
| "rewards/rejected": -5.4609375, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.75392, | |
| "grad_norm": 12.393571858690176, | |
| "learning_rate": 1.214451052557453e-07, | |
| "logits/chosen": -0.177520751953125, | |
| "logits/rejected": -0.19673919677734375, | |
| "logps/chosen": -767.0, | |
| "logps/rejected": -846.5, | |
| "loss": 0.5177, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.318359375, | |
| "rewards/margins": 0.98583984375, | |
| "rewards/rejected": -5.30078125, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.7552, | |
| "grad_norm": 22.161788073197705, | |
| "learning_rate": 1.202611510950747e-07, | |
| "logits/chosen": -0.08966064453125, | |
| "logits/rejected": -0.11224365234375, | |
| "logps/chosen": -767.5, | |
| "logps/rejected": -872.0, | |
| "loss": 0.5003, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.390625, | |
| "rewards/margins": 1.14306640625, | |
| "rewards/rejected": -5.53515625, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.75648, | |
| "grad_norm": 12.260254703657399, | |
| "learning_rate": 1.1908179800860415e-07, | |
| "logits/chosen": -0.0744476318359375, | |
| "logits/rejected": -0.13702392578125, | |
| "logps/chosen": -723.0, | |
| "logps/rejected": -851.5, | |
| "loss": 0.4506, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.1640625, | |
| "rewards/margins": 1.28857421875, | |
| "rewards/rejected": -5.4453125, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.75776, | |
| "grad_norm": 9.937355029365806, | |
| "learning_rate": 1.1790706961572176e-07, | |
| "logits/chosen": -0.20186614990234375, | |
| "logits/rejected": -0.202545166015625, | |
| "logps/chosen": -788.0, | |
| "logps/rejected": -897.5, | |
| "loss": 0.4231, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.359375, | |
| "rewards/margins": 1.36572265625, | |
| "rewards/rejected": -5.71875, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.75904, | |
| "grad_norm": 10.470389968279672, | |
| "learning_rate": 1.1673698944319491e-07, | |
| "logits/chosen": -0.11907958984375, | |
| "logits/rejected": -0.17713165283203125, | |
| "logps/chosen": -760.5, | |
| "logps/rejected": -864.5, | |
| "loss": 0.4464, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.271484375, | |
| "rewards/margins": 1.3154296875, | |
| "rewards/rejected": -5.58984375, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.76032, | |
| "grad_norm": 25.904113311082135, | |
| "learning_rate": 1.1557158092469967e-07, | |
| "logits/chosen": -0.13934326171875, | |
| "logits/rejected": -0.1776123046875, | |
| "logps/chosen": -762.0, | |
| "logps/rejected": -914.5, | |
| "loss": 0.5208, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.568359375, | |
| "rewards/margins": 1.5400390625, | |
| "rewards/rejected": -6.109375, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7616, | |
| "grad_norm": 14.910481470992487, | |
| "learning_rate": 1.1441086740035036e-07, | |
| "logits/chosen": -0.197296142578125, | |
| "logits/rejected": -0.231689453125, | |
| "logps/chosen": -748.5, | |
| "logps/rejected": -863.0, | |
| "loss": 0.3905, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -4.0625, | |
| "rewards/margins": 1.39453125, | |
| "rewards/rejected": -5.4609375, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.76288, | |
| "grad_norm": 10.59387700561826, | |
| "learning_rate": 1.1325487211623342e-07, | |
| "logits/chosen": -0.15478515625, | |
| "logits/rejected": -0.188751220703125, | |
| "logps/chosen": -709.0, | |
| "logps/rejected": -851.0, | |
| "loss": 0.4572, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.25390625, | |
| "rewards/margins": 1.296875, | |
| "rewards/rejected": -5.546875, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.76416, | |
| "grad_norm": 11.067694544255176, | |
| "learning_rate": 1.1210361822394029e-07, | |
| "logits/chosen": -0.2096710205078125, | |
| "logits/rejected": -0.2550048828125, | |
| "logps/chosen": -758.0, | |
| "logps/rejected": -878.5, | |
| "loss": 0.4847, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.25, | |
| "rewards/margins": 1.408203125, | |
| "rewards/rejected": -5.65234375, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.76544, | |
| "grad_norm": 45.69276578416623, | |
| "learning_rate": 1.1095712878010541e-07, | |
| "logits/chosen": -0.0606689453125, | |
| "logits/rejected": -0.1527099609375, | |
| "logps/chosen": -777.0, | |
| "logps/rejected": -876.0, | |
| "loss": 0.5514, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.46484375, | |
| "rewards/margins": 1.1143798828125, | |
| "rewards/rejected": -5.58203125, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.76672, | |
| "grad_norm": 12.679098112978194, | |
| "learning_rate": 1.0981542674594328e-07, | |
| "logits/chosen": -0.1817779541015625, | |
| "logits/rejected": -0.23337554931640625, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -842.5, | |
| "loss": 0.4295, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.791015625, | |
| "rewards/margins": 1.537109375, | |
| "rewards/rejected": -5.33203125, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 10.800303505547818, | |
| "learning_rate": 1.0867853498678901e-07, | |
| "logits/chosen": -0.273681640625, | |
| "logits/rejected": -0.293701171875, | |
| "logps/chosen": -705.0, | |
| "logps/rejected": -885.5, | |
| "loss": 0.4958, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.986328125, | |
| "rewards/margins": 1.387939453125, | |
| "rewards/rejected": -5.375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "eval_logits/chosen": -0.19268035888671875, | |
| "eval_logits/rejected": -0.27252197265625, | |
| "eval_logps/chosen": -706.25, | |
| "eval_logps/rejected": -804.5, | |
| "eval_loss": 0.47617968916893005, | |
| "eval_rewards/accuracies": 0.740234375, | |
| "eval_rewards/chosen": -3.86328125, | |
| "eval_rewards/margins": 1.166748046875, | |
| "eval_rewards/rejected": -5.025390625, | |
| "eval_runtime": 27.9176, | |
| "eval_samples_per_second": 17.91, | |
| "eval_steps_per_second": 0.573, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.76928, | |
| "grad_norm": 9.048271943790935, | |
| "learning_rate": 1.0754647627164022e-07, | |
| "logits/chosen": -0.184814453125, | |
| "logits/rejected": -0.2506103515625, | |
| "logps/chosen": -703.5, | |
| "logps/rejected": -881.5, | |
| "loss": 0.3442, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.921875, | |
| "rewards/margins": 1.6376953125, | |
| "rewards/rejected": -5.5625, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.77056, | |
| "grad_norm": 26.0709495707422, | |
| "learning_rate": 1.064192732727016e-07, | |
| "logits/chosen": -0.19481658935546875, | |
| "logits/rejected": -0.232147216796875, | |
| "logps/chosen": -694.5, | |
| "logps/rejected": -756.0, | |
| "loss": 0.6118, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -3.634765625, | |
| "rewards/margins": 0.9560546875, | |
| "rewards/rejected": -4.59375, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.77184, | |
| "grad_norm": 9.88642766961704, | |
| "learning_rate": 1.0529694856493002e-07, | |
| "logits/chosen": -0.2410888671875, | |
| "logits/rejected": -0.287353515625, | |
| "logps/chosen": -764.0, | |
| "logps/rejected": -886.5, | |
| "loss": 0.4617, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.41015625, | |
| "rewards/margins": 1.09234619140625, | |
| "rewards/rejected": -5.50390625, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.77312, | |
| "grad_norm": 9.433226751910178, | |
| "learning_rate": 1.0417952462558286e-07, | |
| "logits/chosen": -0.21282958984375, | |
| "logits/rejected": -0.2587127685546875, | |
| "logps/chosen": -719.0, | |
| "logps/rejected": -844.5, | |
| "loss": 0.4325, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.951171875, | |
| "rewards/margins": 1.25732421875, | |
| "rewards/rejected": -5.2109375, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7744, | |
| "grad_norm": 11.539221682126705, | |
| "learning_rate": 1.0306702383376813e-07, | |
| "logits/chosen": -0.20572662353515625, | |
| "logits/rejected": -0.2346649169921875, | |
| "logps/chosen": -729.0, | |
| "logps/rejected": -841.5, | |
| "loss": 0.4227, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.88671875, | |
| "rewards/margins": 1.31982421875, | |
| "rewards/rejected": -5.20703125, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.77568, | |
| "grad_norm": 9.610274231941142, | |
| "learning_rate": 1.0195946846999551e-07, | |
| "logits/chosen": -0.274658203125, | |
| "logits/rejected": -0.3345947265625, | |
| "logps/chosen": -688.5, | |
| "logps/rejected": -803.5, | |
| "loss": 0.4396, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.70703125, | |
| "rewards/margins": 1.340576171875, | |
| "rewards/rejected": -5.04296875, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.77696, | |
| "grad_norm": 9.567408589138397, | |
| "learning_rate": 1.0085688071573085e-07, | |
| "logits/chosen": -0.221038818359375, | |
| "logits/rejected": -0.2277069091796875, | |
| "logps/chosen": -713.0, | |
| "logps/rejected": -840.0, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.861328125, | |
| "rewards/margins": 1.158935546875, | |
| "rewards/rejected": -5.015625, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.77824, | |
| "grad_norm": 12.50027732304155, | |
| "learning_rate": 9.975928265295139e-08, | |
| "logits/chosen": -0.26611328125, | |
| "logits/rejected": -0.309814453125, | |
| "logps/chosen": -691.5, | |
| "logps/rejected": -806.5, | |
| "loss": 0.3885, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.720703125, | |
| "rewards/margins": 1.33642578125, | |
| "rewards/rejected": -5.05859375, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.77952, | |
| "grad_norm": 10.627712453917534, | |
| "learning_rate": 9.866669626370412e-08, | |
| "logits/chosen": -0.2711181640625, | |
| "logits/rejected": -0.32177734375, | |
| "logps/chosen": -739.5, | |
| "logps/rejected": -880.0, | |
| "loss": 0.4459, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.81640625, | |
| "rewards/margins": 1.3115234375, | |
| "rewards/rejected": -5.12109375, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7808, | |
| "grad_norm": 9.075340025404623, | |
| "learning_rate": 9.757914342966495e-08, | |
| "logits/chosen": -0.17635345458984375, | |
| "logits/rejected": -0.23785400390625, | |
| "logps/chosen": -698.5, | |
| "logps/rejected": -799.5, | |
| "loss": 0.4333, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.68359375, | |
| "rewards/margins": 1.27197265625, | |
| "rewards/rejected": -4.958984375, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.78208, | |
| "grad_norm": 17.6370736058902, | |
| "learning_rate": 9.64966459317006e-08, | |
| "logits/chosen": -0.2579345703125, | |
| "logits/rejected": -0.31915283203125, | |
| "logps/chosen": -692.5, | |
| "logps/rejected": -871.0, | |
| "loss": 0.3659, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.849609375, | |
| "rewards/margins": 1.578125, | |
| "rewards/rejected": -5.421875, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.78336, | |
| "grad_norm": 10.37299929145861, | |
| "learning_rate": 9.541922544943294e-08, | |
| "logits/chosen": -0.2517547607421875, | |
| "logits/rejected": -0.315185546875, | |
| "logps/chosen": -712.5, | |
| "logps/rejected": -842.0, | |
| "loss": 0.4197, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.94140625, | |
| "rewards/margins": 1.3232421875, | |
| "rewards/rejected": -5.265625, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.78464, | |
| "grad_norm": 8.76113086501841, | |
| "learning_rate": 9.434690356080393e-08, | |
| "logits/chosen": -0.3062744140625, | |
| "logits/rejected": -0.37060546875, | |
| "logps/chosen": -683.0, | |
| "logps/rejected": -821.0, | |
| "loss": 0.4658, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.763671875, | |
| "rewards/margins": 1.23486328125, | |
| "rewards/rejected": -5.00390625, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.78592, | |
| "grad_norm": 12.189626085161583, | |
| "learning_rate": 9.327970174164408e-08, | |
| "logits/chosen": -0.16046142578125, | |
| "logits/rejected": -0.1937255859375, | |
| "logps/chosen": -695.5, | |
| "logps/rejected": -782.0, | |
| "loss": 0.534, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.953125, | |
| "rewards/margins": 0.9755859375, | |
| "rewards/rejected": -4.92578125, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7872, | |
| "grad_norm": 15.30898275082047, | |
| "learning_rate": 9.221764136524202e-08, | |
| "logits/chosen": -0.2682647705078125, | |
| "logits/rejected": -0.337371826171875, | |
| "logps/chosen": -689.5, | |
| "logps/rejected": -798.0, | |
| "loss": 0.417, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.775390625, | |
| "rewards/margins": 1.229736328125, | |
| "rewards/rejected": -5.0078125, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.78848, | |
| "grad_norm": 9.836705918506746, | |
| "learning_rate": 9.116074370191705e-08, | |
| "logits/chosen": -0.2236480712890625, | |
| "logits/rejected": -0.28759765625, | |
| "logps/chosen": -670.0, | |
| "logps/rejected": -761.0, | |
| "loss": 0.4567, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.65234375, | |
| "rewards/margins": 1.11376953125, | |
| "rewards/rejected": -4.765625, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.78976, | |
| "grad_norm": 9.891228631270444, | |
| "learning_rate": 9.010902991859196e-08, | |
| "logits/chosen": -0.19852447509765625, | |
| "logits/rejected": -0.21734619140625, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -813.5, | |
| "loss": 0.4786, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.890625, | |
| "rewards/margins": 1.10205078125, | |
| "rewards/rejected": -4.98828125, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.79104, | |
| "grad_norm": 10.619085582647367, | |
| "learning_rate": 8.906252107837054e-08, | |
| "logits/chosen": -0.24609375, | |
| "logits/rejected": -0.298095703125, | |
| "logps/chosen": -682.5, | |
| "logps/rejected": -806.0, | |
| "loss": 0.4685, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.83203125, | |
| "rewards/margins": 1.31005859375, | |
| "rewards/rejected": -5.1328125, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.79232, | |
| "grad_norm": 8.55320493819975, | |
| "learning_rate": 8.802123814011458e-08, | |
| "logits/chosen": -0.2899169921875, | |
| "logits/rejected": -0.3363037109375, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -820.5, | |
| "loss": 0.4403, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.98046875, | |
| "rewards/margins": 1.17822265625, | |
| "rewards/rejected": -5.15625, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.7936, | |
| "grad_norm": 9.70388545104585, | |
| "learning_rate": 8.698520195802499e-08, | |
| "logits/chosen": -0.26741790771484375, | |
| "logits/rejected": -0.2891845703125, | |
| "logps/chosen": -744.5, | |
| "logps/rejected": -860.0, | |
| "loss": 0.4775, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.951171875, | |
| "rewards/margins": 1.2177734375, | |
| "rewards/rejected": -5.171875, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.79488, | |
| "grad_norm": 12.896195486210319, | |
| "learning_rate": 8.595443328122345e-08, | |
| "logits/chosen": -0.253082275390625, | |
| "logits/rejected": -0.288360595703125, | |
| "logps/chosen": -699.0, | |
| "logps/rejected": -840.0, | |
| "loss": 0.4228, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.779296875, | |
| "rewards/margins": 1.33056640625, | |
| "rewards/rejected": -5.1171875, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.79616, | |
| "grad_norm": 10.316721984106971, | |
| "learning_rate": 8.492895275333704e-08, | |
| "logits/chosen": -0.242431640625, | |
| "logits/rejected": -0.29632568359375, | |
| "logps/chosen": -647.0, | |
| "logps/rejected": -806.5, | |
| "loss": 0.4143, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.740234375, | |
| "rewards/margins": 1.31005859375, | |
| "rewards/rejected": -5.05078125, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.79744, | |
| "grad_norm": 10.794420061393975, | |
| "learning_rate": 8.390878091208543e-08, | |
| "logits/chosen": -0.1590423583984375, | |
| "logits/rejected": -0.21337890625, | |
| "logps/chosen": -706.0, | |
| "logps/rejected": -777.0, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.787109375, | |
| "rewards/margins": 1.115234375, | |
| "rewards/rejected": -4.90625, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.79872, | |
| "grad_norm": 8.48224104747155, | |
| "learning_rate": 8.289393818886838e-08, | |
| "logits/chosen": -0.1787109375, | |
| "logits/rejected": -0.205230712890625, | |
| "logps/chosen": -709.5, | |
| "logps/rejected": -812.0, | |
| "loss": 0.4332, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.748046875, | |
| "rewards/margins": 1.294921875, | |
| "rewards/rejected": -5.0546875, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 23.137925812568916, | |
| "learning_rate": 8.188444490835773e-08, | |
| "logits/chosen": -0.23029327392578125, | |
| "logits/rejected": -0.27685546875, | |
| "logps/chosen": -721.5, | |
| "logps/rejected": -848.0, | |
| "loss": 0.508, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.85546875, | |
| "rewards/margins": 1.2080078125, | |
| "rewards/rejected": -5.0546875, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.80128, | |
| "grad_norm": 9.512426613145827, | |
| "learning_rate": 8.088032128808952e-08, | |
| "logits/chosen": -0.16583251953125, | |
| "logits/rejected": -0.2113037109375, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -822.5, | |
| "loss": 0.4431, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.73046875, | |
| "rewards/margins": 1.28515625, | |
| "rewards/rejected": -5.021484375, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.80256, | |
| "grad_norm": 9.523485315600638, | |
| "learning_rate": 7.988158743805972e-08, | |
| "logits/chosen": -0.28082275390625, | |
| "logits/rejected": -0.3321533203125, | |
| "logps/chosen": -685.5, | |
| "logps/rejected": -781.0, | |
| "loss": 0.4848, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.615234375, | |
| "rewards/margins": 1.0703125, | |
| "rewards/rejected": -4.6875, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.80384, | |
| "grad_norm": 9.450418163489136, | |
| "learning_rate": 7.888826336032093e-08, | |
| "logits/chosen": -0.2333984375, | |
| "logits/rejected": -0.3123779296875, | |
| "logps/chosen": -714.5, | |
| "logps/rejected": -828.0, | |
| "loss": 0.4416, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.89453125, | |
| "rewards/margins": 1.34619140625, | |
| "rewards/rejected": -5.2421875, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.80512, | |
| "grad_norm": 13.14269906470784, | |
| "learning_rate": 7.790036894858197e-08, | |
| "logits/chosen": -0.24493408203125, | |
| "logits/rejected": -0.3167724609375, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -795.0, | |
| "loss": 0.4833, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.8125, | |
| "rewards/margins": 1.1923828125, | |
| "rewards/rejected": -5.0, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.8064, | |
| "grad_norm": 17.566771895062967, | |
| "learning_rate": 7.691792398780962e-08, | |
| "logits/chosen": -0.18544769287109375, | |
| "logits/rejected": -0.23531341552734375, | |
| "logps/chosen": -714.0, | |
| "logps/rejected": -850.0, | |
| "loss": 0.4277, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.693359375, | |
| "rewards/margins": 1.42333984375, | |
| "rewards/rejected": -5.1171875, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.80768, | |
| "grad_norm": 10.9996007916988, | |
| "learning_rate": 7.594094815383224e-08, | |
| "logits/chosen": -0.1927490234375, | |
| "logits/rejected": -0.25665283203125, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -824.5, | |
| "loss": 0.4316, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.744140625, | |
| "rewards/margins": 1.3701171875, | |
| "rewards/rejected": -5.10546875, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.80896, | |
| "grad_norm": 16.150017435469856, | |
| "learning_rate": 7.496946101294586e-08, | |
| "logits/chosen": -0.2930908203125, | |
| "logits/rejected": -0.3349609375, | |
| "logps/chosen": -733.5, | |
| "logps/rejected": -856.0, | |
| "loss": 0.423, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.951171875, | |
| "rewards/margins": 1.19189453125, | |
| "rewards/rejected": -5.140625, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.81024, | |
| "grad_norm": 22.47987317628445, | |
| "learning_rate": 7.400348202152192e-08, | |
| "logits/chosen": -0.12548828125, | |
| "logits/rejected": -0.17365264892578125, | |
| "logps/chosen": -735.0, | |
| "logps/rejected": -837.5, | |
| "loss": 0.5789, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -4.09765625, | |
| "rewards/margins": 0.86962890625, | |
| "rewards/rejected": -4.958984375, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.81152, | |
| "grad_norm": 15.769251930029661, | |
| "learning_rate": 7.304303052561841e-08, | |
| "logits/chosen": -0.16961669921875, | |
| "logits/rejected": -0.24853515625, | |
| "logps/chosen": -687.0, | |
| "logps/rejected": -828.5, | |
| "loss": 0.3706, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.740234375, | |
| "rewards/margins": 1.4990234375, | |
| "rewards/rejected": -5.23046875, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.8128, | |
| "grad_norm": 14.6903117429968, | |
| "learning_rate": 7.208812576059112e-08, | |
| "logits/chosen": -0.35009765625, | |
| "logits/rejected": -0.3636474609375, | |
| "logps/chosen": -746.5, | |
| "logps/rejected": -820.5, | |
| "loss": 0.5197, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.94140625, | |
| "rewards/margins": 1.1171875, | |
| "rewards/rejected": -5.06640625, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.81408, | |
| "grad_norm": 13.239985107487296, | |
| "learning_rate": 7.113878685070993e-08, | |
| "logits/chosen": -0.2686767578125, | |
| "logits/rejected": -0.2921142578125, | |
| "logps/chosen": -738.5, | |
| "logps/rejected": -828.5, | |
| "loss": 0.5338, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.21875, | |
| "rewards/margins": 0.9364013671875, | |
| "rewards/rejected": -5.1484375, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.81536, | |
| "grad_norm": 18.26280480241238, | |
| "learning_rate": 7.019503280877466e-08, | |
| "logits/chosen": -0.207672119140625, | |
| "logits/rejected": -0.2611083984375, | |
| "logps/chosen": -721.0, | |
| "logps/rejected": -863.5, | |
| "loss": 0.3718, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": -3.927734375, | |
| "rewards/margins": 1.470703125, | |
| "rewards/rejected": -5.40234375, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.81664, | |
| "grad_norm": 11.402715846459442, | |
| "learning_rate": 6.925688253573465e-08, | |
| "logits/chosen": -0.165740966796875, | |
| "logits/rejected": -0.204833984375, | |
| "logps/chosen": -711.0, | |
| "logps/rejected": -823.0, | |
| "loss": 0.4429, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.888671875, | |
| "rewards/margins": 1.33349609375, | |
| "rewards/rejected": -5.22265625, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.81792, | |
| "grad_norm": 9.972091848028276, | |
| "learning_rate": 6.832435482031064e-08, | |
| "logits/chosen": -0.244384765625, | |
| "logits/rejected": -0.24786376953125, | |
| "logps/chosen": -713.5, | |
| "logps/rejected": -818.0, | |
| "loss": 0.445, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.828125, | |
| "rewards/margins": 1.15966796875, | |
| "rewards/rejected": -4.986328125, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.8192, | |
| "grad_norm": 15.756161036698968, | |
| "learning_rate": 6.739746833861759e-08, | |
| "logits/chosen": -0.227813720703125, | |
| "logits/rejected": -0.30035400390625, | |
| "logps/chosen": -700.5, | |
| "logps/rejected": -846.5, | |
| "loss": 0.4093, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.9375, | |
| "rewards/margins": 1.30615234375, | |
| "rewards/rejected": -5.25390625, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.82048, | |
| "grad_norm": 12.006378355004856, | |
| "learning_rate": 6.647624165379173e-08, | |
| "logits/chosen": -0.203277587890625, | |
| "logits/rejected": -0.27020263671875, | |
| "logps/chosen": -650.0, | |
| "logps/rejected": -774.0, | |
| "loss": 0.4311, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.59765625, | |
| "rewards/margins": 1.26904296875, | |
| "rewards/rejected": -4.8671875, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.82176, | |
| "grad_norm": 8.47930046439374, | |
| "learning_rate": 6.55606932156175e-08, | |
| "logits/chosen": -0.185546875, | |
| "logits/rejected": -0.24249267578125, | |
| "logps/chosen": -706.5, | |
| "logps/rejected": -811.5, | |
| "loss": 0.404, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.810546875, | |
| "rewards/margins": 1.3720703125, | |
| "rewards/rejected": -5.1796875, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.82304, | |
| "grad_norm": 8.893390613363472, | |
| "learning_rate": 6.46508413601595e-08, | |
| "logits/chosen": -0.22678756713867188, | |
| "logits/rejected": -0.26580810546875, | |
| "logps/chosen": -698.0, | |
| "logps/rejected": -811.5, | |
| "loss": 0.4419, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.79296875, | |
| "rewards/margins": 1.20361328125, | |
| "rewards/rejected": -4.99609375, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.82432, | |
| "grad_norm": 9.63687460143777, | |
| "learning_rate": 6.374670430939404e-08, | |
| "logits/chosen": -0.19427490234375, | |
| "logits/rejected": -0.2384033203125, | |
| "logps/chosen": -729.5, | |
| "logps/rejected": -863.0, | |
| "loss": 0.4036, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.90234375, | |
| "rewards/margins": 1.35791015625, | |
| "rewards/rejected": -5.2578125, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8256, | |
| "grad_norm": 16.684775851774532, | |
| "learning_rate": 6.284830017084488e-08, | |
| "logits/chosen": -0.223876953125, | |
| "logits/rejected": -0.291748046875, | |
| "logps/chosen": -698.5, | |
| "logps/rejected": -857.0, | |
| "loss": 0.4179, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -4.015625, | |
| "rewards/margins": 1.4716796875, | |
| "rewards/rejected": -5.4921875, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.82688, | |
| "grad_norm": 20.24004777671895, | |
| "learning_rate": 6.195564693722028e-08, | |
| "logits/chosen": -0.24462890625, | |
| "logits/rejected": -0.2711181640625, | |
| "logps/chosen": -721.0, | |
| "logps/rejected": -830.5, | |
| "loss": 0.5167, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.87109375, | |
| "rewards/margins": 1.27490234375, | |
| "rewards/rejected": -5.140625, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.82816, | |
| "grad_norm": 9.754642974447716, | |
| "learning_rate": 6.1068762486053e-08, | |
| "logits/chosen": -0.10089111328125, | |
| "logits/rejected": -0.14161300659179688, | |
| "logps/chosen": -691.5, | |
| "logps/rejected": -848.5, | |
| "loss": 0.449, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.041015625, | |
| "rewards/margins": 1.12451171875, | |
| "rewards/rejected": -5.17578125, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.82944, | |
| "grad_norm": 8.380496897716654, | |
| "learning_rate": 6.018766457934177e-08, | |
| "logits/chosen": -0.157867431640625, | |
| "logits/rejected": -0.177886962890625, | |
| "logps/chosen": -722.0, | |
| "logps/rejected": -858.5, | |
| "loss": 0.4101, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -3.986328125, | |
| "rewards/margins": 1.50341796875, | |
| "rewards/rejected": -5.49609375, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.83072, | |
| "grad_norm": 30.217667929527913, | |
| "learning_rate": 5.931237086319592e-08, | |
| "logits/chosen": -0.240966796875, | |
| "logits/rejected": -0.3031005859375, | |
| "logps/chosen": -713.0, | |
| "logps/rejected": -784.0, | |
| "loss": 0.6239, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -4.0, | |
| "rewards/margins": 0.900146484375, | |
| "rewards/rejected": -4.8984375, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 13.251227443569483, | |
| "learning_rate": 5.844289886748196e-08, | |
| "logits/chosen": -0.1904144287109375, | |
| "logits/rejected": -0.260955810546875, | |
| "logps/chosen": -729.0, | |
| "logps/rejected": -828.0, | |
| "loss": 0.4859, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.0078125, | |
| "rewards/margins": 1.28173828125, | |
| "rewards/rejected": -5.29296875, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.83328, | |
| "grad_norm": 10.941982402370376, | |
| "learning_rate": 5.7579266005472304e-08, | |
| "logits/chosen": -0.22198486328125, | |
| "logits/rejected": -0.2850189208984375, | |
| "logps/chosen": -731.0, | |
| "logps/rejected": -805.5, | |
| "loss": 0.4837, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.943359375, | |
| "rewards/margins": 1.12548828125, | |
| "rewards/rejected": -5.072265625, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.83456, | |
| "grad_norm": 11.451372721475789, | |
| "learning_rate": 5.672148957349661e-08, | |
| "logits/chosen": -0.2811279296875, | |
| "logits/rejected": -0.3087158203125, | |
| "logps/chosen": -737.5, | |
| "logps/rejected": -837.5, | |
| "loss": 0.5053, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.259765625, | |
| "rewards/margins": 1.033203125, | |
| "rewards/rejected": -5.29296875, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.83584, | |
| "grad_norm": 13.98757298272134, | |
| "learning_rate": 5.586958675059548e-08, | |
| "logits/chosen": -0.2623291015625, | |
| "logits/rejected": -0.28857421875, | |
| "logps/chosen": -730.0, | |
| "logps/rejected": -806.0, | |
| "loss": 0.5169, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.154296875, | |
| "rewards/margins": 0.978271484375, | |
| "rewards/rejected": -5.13671875, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.83712, | |
| "grad_norm": 12.725608737110653, | |
| "learning_rate": 5.502357459817639e-08, | |
| "logits/chosen": -0.3026123046875, | |
| "logits/rejected": -0.3392333984375, | |
| "logps/chosen": -785.5, | |
| "logps/rejected": -902.5, | |
| "loss": 0.4683, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.27734375, | |
| "rewards/margins": 1.37109375, | |
| "rewards/rejected": -5.6484375, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.8384, | |
| "grad_norm": 18.59261713230035, | |
| "learning_rate": 5.418347005967189e-08, | |
| "logits/chosen": -0.22393798828125, | |
| "logits/rejected": -0.2733154296875, | |
| "logps/chosen": -670.5, | |
| "logps/rejected": -854.0, | |
| "loss": 0.3584, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -3.83203125, | |
| "rewards/margins": 1.6171875, | |
| "rewards/rejected": -5.453125, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.83968, | |
| "grad_norm": 15.681430188596131, | |
| "learning_rate": 5.334928996020012e-08, | |
| "logits/chosen": -0.2601318359375, | |
| "logits/rejected": -0.2978515625, | |
| "logps/chosen": -706.0, | |
| "logps/rejected": -789.0, | |
| "loss": 0.4116, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.630859375, | |
| "rewards/margins": 1.21728515625, | |
| "rewards/rejected": -4.84375, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.84096, | |
| "grad_norm": 14.42329884280154, | |
| "learning_rate": 5.2521051006228475e-08, | |
| "logits/chosen": -0.16900634765625, | |
| "logits/rejected": -0.22943115234375, | |
| "logps/chosen": -719.0, | |
| "logps/rejected": -837.5, | |
| "loss": 0.4993, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.09765625, | |
| "rewards/margins": 1.08544921875, | |
| "rewards/rejected": -5.18359375, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.84224, | |
| "grad_norm": 8.370169088229156, | |
| "learning_rate": 5.169876978523828e-08, | |
| "logits/chosen": -0.2957763671875, | |
| "logits/rejected": -0.3585205078125, | |
| "logps/chosen": -735.0, | |
| "logps/rejected": -874.0, | |
| "loss": 0.4017, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -4.001953125, | |
| "rewards/margins": 1.5009765625, | |
| "rewards/rejected": -5.51171875, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.84352, | |
| "grad_norm": 9.292854951761994, | |
| "learning_rate": 5.088246276539292e-08, | |
| "logits/chosen": -0.2945556640625, | |
| "logits/rejected": -0.3333740234375, | |
| "logps/chosen": -772.5, | |
| "logps/rejected": -869.5, | |
| "loss": 0.4474, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.111328125, | |
| "rewards/margins": 1.32177734375, | |
| "rewards/rejected": -5.4296875, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.8448, | |
| "grad_norm": 12.841846778247005, | |
| "learning_rate": 5.0072146295208e-08, | |
| "logits/chosen": -0.3035888671875, | |
| "logits/rejected": -0.3436279296875, | |
| "logps/chosen": -690.0, | |
| "logps/rejected": -835.0, | |
| "loss": 0.3979, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.791015625, | |
| "rewards/margins": 1.52734375, | |
| "rewards/rejected": -5.31640625, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.84608, | |
| "grad_norm": 12.621963464933918, | |
| "learning_rate": 4.926783660322411e-08, | |
| "logits/chosen": -0.275390625, | |
| "logits/rejected": -0.326904296875, | |
| "logps/chosen": -724.0, | |
| "logps/rejected": -809.0, | |
| "loss": 0.4929, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.876953125, | |
| "rewards/margins": 1.166015625, | |
| "rewards/rejected": -5.046875, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.84736, | |
| "grad_norm": 9.6461916615264, | |
| "learning_rate": 4.846954979768149e-08, | |
| "logits/chosen": -0.308380126953125, | |
| "logits/rejected": -0.3372802734375, | |
| "logps/chosen": -733.0, | |
| "logps/rejected": -825.0, | |
| "loss": 0.4582, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.904296875, | |
| "rewards/margins": 0.998046875, | |
| "rewards/rejected": -4.90625, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.84864, | |
| "grad_norm": 12.039674839827219, | |
| "learning_rate": 4.7677301866197455e-08, | |
| "logits/chosen": -0.2691650390625, | |
| "logits/rejected": -0.311279296875, | |
| "logps/chosen": -731.5, | |
| "logps/rejected": -866.0, | |
| "loss": 0.4554, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.056640625, | |
| "rewards/margins": 1.3349609375, | |
| "rewards/rejected": -5.38671875, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.84992, | |
| "grad_norm": 8.987446887684213, | |
| "learning_rate": 4.689110867544645e-08, | |
| "logits/chosen": -0.169097900390625, | |
| "logits/rejected": -0.20819091796875, | |
| "logps/chosen": -667.5, | |
| "logps/rejected": -767.0, | |
| "loss": 0.4374, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.666015625, | |
| "rewards/margins": 1.1484375, | |
| "rewards/rejected": -4.8046875, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8512, | |
| "grad_norm": 14.644380809572876, | |
| "learning_rate": 4.611098597084226e-08, | |
| "logits/chosen": -0.1959228515625, | |
| "logits/rejected": -0.249267578125, | |
| "logps/chosen": -718.0, | |
| "logps/rejected": -857.5, | |
| "loss": 0.5053, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -4.177734375, | |
| "rewards/margins": 1.05908203125, | |
| "rewards/rejected": -5.23046875, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.85248, | |
| "grad_norm": 8.230847446292731, | |
| "learning_rate": 4.5336949376222274e-08, | |
| "logits/chosen": -0.2403564453125, | |
| "logits/rejected": -0.26849365234375, | |
| "logps/chosen": -734.5, | |
| "logps/rejected": -859.0, | |
| "loss": 0.3859, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.775390625, | |
| "rewards/margins": 1.490234375, | |
| "rewards/rejected": -5.2734375, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.85376, | |
| "grad_norm": 9.397546683815692, | |
| "learning_rate": 4.4569014393534986e-08, | |
| "logits/chosen": -0.22314453125, | |
| "logits/rejected": -0.26226806640625, | |
| "logps/chosen": -684.5, | |
| "logps/rejected": -783.0, | |
| "loss": 0.4507, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.8203125, | |
| "rewards/margins": 1.06103515625, | |
| "rewards/rejected": -4.87890625, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.85504, | |
| "grad_norm": 9.127958781532016, | |
| "learning_rate": 4.380719640252953e-08, | |
| "logits/chosen": -0.2354736328125, | |
| "logits/rejected": -0.2667236328125, | |
| "logps/chosen": -749.5, | |
| "logps/rejected": -839.5, | |
| "loss": 0.456, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.07421875, | |
| "rewards/margins": 1.228515625, | |
| "rewards/rejected": -5.30078125, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.85632, | |
| "grad_norm": 9.421323267573934, | |
| "learning_rate": 4.3051510660447335e-08, | |
| "logits/chosen": -0.295379638671875, | |
| "logits/rejected": -0.35400390625, | |
| "logps/chosen": -676.0, | |
| "logps/rejected": -819.0, | |
| "loss": 0.4437, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.71875, | |
| "rewards/margins": 1.28564453125, | |
| "rewards/rejected": -5.0078125, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.8576, | |
| "grad_norm": 10.534790363492052, | |
| "learning_rate": 4.230197230171693e-08, | |
| "logits/chosen": -0.3251953125, | |
| "logits/rejected": -0.375244140625, | |
| "logps/chosen": -760.5, | |
| "logps/rejected": -831.0, | |
| "loss": 0.4472, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.83984375, | |
| "rewards/margins": 1.1640625, | |
| "rewards/rejected": -4.998046875, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.85888, | |
| "grad_norm": 8.740923113820415, | |
| "learning_rate": 4.155859633765044e-08, | |
| "logits/chosen": -0.27716064453125, | |
| "logits/rejected": -0.295166015625, | |
| "logps/chosen": -698.0, | |
| "logps/rejected": -825.0, | |
| "loss": 0.459, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.890625, | |
| "rewards/margins": 1.258544921875, | |
| "rewards/rejected": -5.15234375, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.86016, | |
| "grad_norm": 11.346727433404798, | |
| "learning_rate": 4.08213976561435e-08, | |
| "logits/chosen": -0.2637290954589844, | |
| "logits/rejected": -0.3646240234375, | |
| "logps/chosen": -735.5, | |
| "logps/rejected": -866.5, | |
| "loss": 0.4928, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.146484375, | |
| "rewards/margins": 1.279296875, | |
| "rewards/rejected": -5.42578125, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.86144, | |
| "grad_norm": 10.398636636838543, | |
| "learning_rate": 4.009039102137657e-08, | |
| "logits/chosen": -0.2867431640625, | |
| "logits/rejected": -0.31884765625, | |
| "logps/chosen": -739.5, | |
| "logps/rejected": -823.0, | |
| "loss": 0.52, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.947265625, | |
| "rewards/margins": 1.0927734375, | |
| "rewards/rejected": -5.03515625, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.86272, | |
| "grad_norm": 13.027404663461114, | |
| "learning_rate": 3.936559107351939e-08, | |
| "logits/chosen": -0.25640869140625, | |
| "logits/rejected": -0.2908935546875, | |
| "logps/chosen": -682.0, | |
| "logps/rejected": -813.5, | |
| "loss": 0.3722, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.75, | |
| "rewards/margins": 1.4267578125, | |
| "rewards/rejected": -5.17578125, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 9.010470752300636, | |
| "learning_rate": 3.864701232843808e-08, | |
| "logits/chosen": -0.335205078125, | |
| "logits/rejected": -0.3759765625, | |
| "logps/chosen": -703.5, | |
| "logps/rejected": -835.0, | |
| "loss": 0.4373, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.712890625, | |
| "rewards/margins": 1.20849609375, | |
| "rewards/rejected": -4.921875, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.86528, | |
| "grad_norm": 9.78070477469183, | |
| "learning_rate": 3.7934669177404015e-08, | |
| "logits/chosen": -0.2587890625, | |
| "logits/rejected": -0.32421875, | |
| "logps/chosen": -728.5, | |
| "logps/rejected": -826.0, | |
| "loss": 0.4832, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.0625, | |
| "rewards/margins": 1.38916015625, | |
| "rewards/rejected": -5.453125, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.86656, | |
| "grad_norm": 12.145508203259396, | |
| "learning_rate": 3.722857588680574e-08, | |
| "logits/chosen": -0.2547607421875, | |
| "logits/rejected": -0.296630859375, | |
| "logps/chosen": -678.0, | |
| "logps/rejected": -843.5, | |
| "loss": 0.3965, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.734375, | |
| "rewards/margins": 1.4375, | |
| "rewards/rejected": -5.171875, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.86784, | |
| "grad_norm": 14.052967853348232, | |
| "learning_rate": 3.652874659786328e-08, | |
| "logits/chosen": -0.268218994140625, | |
| "logits/rejected": -0.321044921875, | |
| "logps/chosen": -726.0, | |
| "logps/rejected": -888.5, | |
| "loss": 0.366, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -3.98046875, | |
| "rewards/margins": 1.5517578125, | |
| "rewards/rejected": -5.53125, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.86912, | |
| "grad_norm": 8.818365407249955, | |
| "learning_rate": 3.583519532634516e-08, | |
| "logits/chosen": -0.2730865478515625, | |
| "logits/rejected": -0.3010711669921875, | |
| "logps/chosen": -701.0, | |
| "logps/rejected": -813.5, | |
| "loss": 0.4284, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.82421875, | |
| "rewards/margins": 1.16748046875, | |
| "rewards/rejected": -4.98828125, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.8704, | |
| "grad_norm": 12.067218675895933, | |
| "learning_rate": 3.514793596228702e-08, | |
| "logits/chosen": -0.2110137939453125, | |
| "logits/rejected": -0.283447265625, | |
| "logps/chosen": -705.0, | |
| "logps/rejected": -851.0, | |
| "loss": 0.3709, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.740234375, | |
| "rewards/margins": 1.58203125, | |
| "rewards/rejected": -5.32421875, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.87168, | |
| "grad_norm": 15.163206625422314, | |
| "learning_rate": 3.4466982269714396e-08, | |
| "logits/chosen": -0.2342529296875, | |
| "logits/rejected": -0.271240234375, | |
| "logps/chosen": -675.0, | |
| "logps/rejected": -841.5, | |
| "loss": 0.4759, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.54296875, | |
| "rewards/margins": 1.16650390625, | |
| "rewards/rejected": -4.70703125, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.87296, | |
| "grad_norm": 13.083926981225705, | |
| "learning_rate": 3.379234788636626e-08, | |
| "logits/chosen": -0.24346923828125, | |
| "logits/rejected": -0.260040283203125, | |
| "logps/chosen": -682.0, | |
| "logps/rejected": -784.0, | |
| "loss": 0.499, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.755859375, | |
| "rewards/margins": 1.13623046875, | |
| "rewards/rejected": -4.892578125, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.87424, | |
| "grad_norm": 9.209267131224845, | |
| "learning_rate": 3.31240463234221e-08, | |
| "logits/chosen": -0.2317047119140625, | |
| "logits/rejected": -0.2623291015625, | |
| "logps/chosen": -707.5, | |
| "logps/rejected": -845.5, | |
| "loss": 0.3632, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.77734375, | |
| "rewards/margins": 1.587890625, | |
| "rewards/rejected": -5.3671875, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.87552, | |
| "grad_norm": 10.57378130101519, | |
| "learning_rate": 3.246209096523176e-08, | |
| "logits/chosen": -0.233551025390625, | |
| "logits/rejected": -0.2782135009765625, | |
| "logps/chosen": -741.0, | |
| "logps/rejected": -830.0, | |
| "loss": 0.4497, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.083984375, | |
| "rewards/margins": 1.17626953125, | |
| "rewards/rejected": -5.26171875, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8768, | |
| "grad_norm": 12.296255284365948, | |
| "learning_rate": 3.180649506904667e-08, | |
| "logits/chosen": -0.2994384765625, | |
| "logits/rejected": -0.3275146484375, | |
| "logps/chosen": -699.5, | |
| "logps/rejected": -800.0, | |
| "loss": 0.4883, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.74609375, | |
| "rewards/margins": 1.09814453125, | |
| "rewards/rejected": -4.84765625, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.87808, | |
| "grad_norm": 12.690499499739776, | |
| "learning_rate": 3.115727176475508e-08, | |
| "logits/chosen": -0.2972412109375, | |
| "logits/rejected": -0.36767578125, | |
| "logps/chosen": -686.0, | |
| "logps/rejected": -812.0, | |
| "loss": 0.427, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.5546875, | |
| "rewards/margins": 1.294921875, | |
| "rewards/rejected": -4.8515625, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.87936, | |
| "grad_norm": 9.788929929625333, | |
| "learning_rate": 3.051443405461822e-08, | |
| "logits/chosen": -0.2645263671875, | |
| "logits/rejected": -0.296142578125, | |
| "logps/chosen": -722.0, | |
| "logps/rejected": -802.0, | |
| "loss": 0.4763, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.84765625, | |
| "rewards/margins": 1.03076171875, | |
| "rewards/rejected": -4.875, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.88064, | |
| "grad_norm": 11.417012062202968, | |
| "learning_rate": 2.987799481301091e-08, | |
| "logits/chosen": -0.282470703125, | |
| "logits/rejected": -0.306640625, | |
| "logps/chosen": -679.5, | |
| "logps/rejected": -821.0, | |
| "loss": 0.4559, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.6875, | |
| "rewards/margins": 1.14501953125, | |
| "rewards/rejected": -4.83984375, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.88192, | |
| "grad_norm": 11.033813131632254, | |
| "learning_rate": 2.924796678616297e-08, | |
| "logits/chosen": -0.2666015625, | |
| "logits/rejected": -0.3145751953125, | |
| "logps/chosen": -709.5, | |
| "logps/rejected": -838.0, | |
| "loss": 0.4804, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.859375, | |
| "rewards/margins": 1.21484375, | |
| "rewards/rejected": -5.07421875, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.8832, | |
| "grad_norm": 9.639277971365505, | |
| "learning_rate": 2.862436259190414e-08, | |
| "logits/chosen": -0.24395751953125, | |
| "logits/rejected": -0.2987060546875, | |
| "logps/chosen": -702.0, | |
| "logps/rejected": -878.5, | |
| "loss": 0.471, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.1015625, | |
| "rewards/margins": 1.365234375, | |
| "rewards/rejected": -5.46484375, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.88448, | |
| "grad_norm": 8.604549708205145, | |
| "learning_rate": 2.800719471941152e-08, | |
| "logits/chosen": -0.26934814453125, | |
| "logits/rejected": -0.3045654296875, | |
| "logps/chosen": -693.5, | |
| "logps/rejected": -802.5, | |
| "loss": 0.4883, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.83203125, | |
| "rewards/margins": 1.09423828125, | |
| "rewards/rejected": -4.9296875, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.88576, | |
| "grad_norm": 8.224767246318196, | |
| "learning_rate": 2.739647552895949e-08, | |
| "logits/chosen": -0.24432373046875, | |
| "logits/rejected": -0.268463134765625, | |
| "logps/chosen": -714.5, | |
| "logps/rejected": -807.0, | |
| "loss": 0.413, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.919921875, | |
| "rewards/margins": 1.251953125, | |
| "rewards/rejected": -5.162109375, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.88704, | |
| "grad_norm": 10.030626998099564, | |
| "learning_rate": 2.6792217251671745e-08, | |
| "logits/chosen": -0.239654541015625, | |
| "logits/rejected": -0.248565673828125, | |
| "logps/chosen": -754.5, | |
| "logps/rejected": -814.5, | |
| "loss": 0.435, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.072265625, | |
| "rewards/margins": 1.16650390625, | |
| "rewards/rejected": -5.23828125, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.88832, | |
| "grad_norm": 11.888659481208316, | |
| "learning_rate": 2.6194431989276773e-08, | |
| "logits/chosen": -0.2733154296875, | |
| "logits/rejected": -0.331298828125, | |
| "logps/chosen": -675.5, | |
| "logps/rejected": -841.5, | |
| "loss": 0.47, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -3.80078125, | |
| "rewards/margins": 1.370849609375, | |
| "rewards/rejected": -5.171875, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8896, | |
| "grad_norm": 9.80399886350075, | |
| "learning_rate": 2.5603131713865372e-08, | |
| "logits/chosen": -0.26300048828125, | |
| "logits/rejected": -0.3204345703125, | |
| "logps/chosen": -745.0, | |
| "logps/rejected": -841.0, | |
| "loss": 0.4927, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.22265625, | |
| "rewards/margins": 1.11181640625, | |
| "rewards/rejected": -5.33984375, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.89088, | |
| "grad_norm": 11.910638478728464, | |
| "learning_rate": 2.5018328267650796e-08, | |
| "logits/chosen": -0.264404296875, | |
| "logits/rejected": -0.276702880859375, | |
| "logps/chosen": -774.5, | |
| "logps/rejected": -894.0, | |
| "loss": 0.4589, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.185546875, | |
| "rewards/margins": 1.25048828125, | |
| "rewards/rejected": -5.43359375, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.89216, | |
| "grad_norm": 9.464215441247289, | |
| "learning_rate": 2.4440033362731626e-08, | |
| "logits/chosen": -0.2911376953125, | |
| "logits/rejected": -0.3267822265625, | |
| "logps/chosen": -739.5, | |
| "logps/rejected": -840.5, | |
| "loss": 0.4642, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -4.09375, | |
| "rewards/margins": 1.214111328125, | |
| "rewards/rejected": -5.30859375, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.89344, | |
| "grad_norm": 11.076879696440999, | |
| "learning_rate": 2.3868258580857163e-08, | |
| "logits/chosen": -0.2603759765625, | |
| "logits/rejected": -0.3201904296875, | |
| "logps/chosen": -723.0, | |
| "logps/rejected": -831.5, | |
| "loss": 0.4242, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.93359375, | |
| "rewards/margins": 1.27734375, | |
| "rewards/rejected": -5.2109375, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.89472, | |
| "grad_norm": 9.93416968714434, | |
| "learning_rate": 2.330301537319571e-08, | |
| "logits/chosen": -0.3101806640625, | |
| "logits/rejected": -0.36083984375, | |
| "logps/chosen": -727.5, | |
| "logps/rejected": -926.0, | |
| "loss": 0.4007, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.072265625, | |
| "rewards/margins": 1.513671875, | |
| "rewards/rejected": -5.5859375, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 8.352284052025437, | |
| "learning_rate": 2.2744315060104845e-08, | |
| "logits/chosen": -0.173828125, | |
| "logits/rejected": -0.23614501953125, | |
| "logps/chosen": -693.0, | |
| "logps/rejected": -882.5, | |
| "loss": 0.3667, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.837890625, | |
| "rewards/margins": 1.92333984375, | |
| "rewards/rejected": -5.76171875, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "eval_logits/chosen": -0.2226715087890625, | |
| "eval_logits/rejected": -0.30279541015625, | |
| "eval_logps/chosen": -703.0, | |
| "eval_logps/rejected": -800.5, | |
| "eval_loss": 0.4712187647819519, | |
| "eval_rewards/accuracies": 0.74609375, | |
| "eval_rewards/chosen": -3.8193359375, | |
| "eval_rewards/margins": 1.1708984375, | |
| "eval_rewards/rejected": -4.990234375, | |
| "eval_runtime": 27.3686, | |
| "eval_samples_per_second": 18.269, | |
| "eval_steps_per_second": 0.585, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.89728, | |
| "grad_norm": 9.595567585238719, | |
| "learning_rate": 2.2192168830904962e-08, | |
| "logits/chosen": -0.2698974609375, | |
| "logits/rejected": -0.294189453125, | |
| "logps/chosen": -675.5, | |
| "logps/rejected": -832.0, | |
| "loss": 0.4001, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.697265625, | |
| "rewards/margins": 1.458984375, | |
| "rewards/rejected": -5.15625, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.89856, | |
| "grad_norm": 12.777276327167845, | |
| "learning_rate": 2.164658774365529e-08, | |
| "logits/chosen": -0.28564453125, | |
| "logits/rejected": -0.334320068359375, | |
| "logps/chosen": -748.5, | |
| "logps/rejected": -879.5, | |
| "loss": 0.4901, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.021484375, | |
| "rewards/margins": 1.2333984375, | |
| "rewards/rejected": -5.25390625, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.89984, | |
| "grad_norm": 9.224895376105101, | |
| "learning_rate": 2.1107582724932088e-08, | |
| "logits/chosen": -0.229522705078125, | |
| "logits/rejected": -0.2841796875, | |
| "logps/chosen": -706.0, | |
| "logps/rejected": -798.5, | |
| "loss": 0.483, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.982421875, | |
| "rewards/margins": 1.16064453125, | |
| "rewards/rejected": -5.14453125, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.90112, | |
| "grad_norm": 9.774698117520298, | |
| "learning_rate": 2.0575164569610016e-08, | |
| "logits/chosen": -0.194671630859375, | |
| "logits/rejected": -0.2525634765625, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -818.0, | |
| "loss": 0.4548, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.830078125, | |
| "rewards/margins": 1.3994140625, | |
| "rewards/rejected": -5.23046875, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9024, | |
| "grad_norm": 9.309057650942739, | |
| "learning_rate": 2.0049343940645937e-08, | |
| "logits/chosen": -0.20440673828125, | |
| "logits/rejected": -0.270263671875, | |
| "logps/chosen": -697.0, | |
| "logps/rejected": -826.5, | |
| "loss": 0.4319, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.767578125, | |
| "rewards/margins": 1.33984375, | |
| "rewards/rejected": -5.109375, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.90368, | |
| "grad_norm": 13.991840106741984, | |
| "learning_rate": 1.953013136886541e-08, | |
| "logits/chosen": -0.2295684814453125, | |
| "logits/rejected": -0.2913818359375, | |
| "logps/chosen": -684.5, | |
| "logps/rejected": -847.0, | |
| "loss": 0.3583, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.62890625, | |
| "rewards/margins": 1.5439453125, | |
| "rewards/rejected": -5.16796875, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.90496, | |
| "grad_norm": 8.819520427002814, | |
| "learning_rate": 1.901753725275166e-08, | |
| "logits/chosen": -0.14373779296875, | |
| "logits/rejected": -0.20538330078125, | |
| "logps/chosen": -721.5, | |
| "logps/rejected": -832.5, | |
| "loss": 0.4801, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.9609375, | |
| "rewards/margins": 1.1591796875, | |
| "rewards/rejected": -5.12109375, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.90624, | |
| "grad_norm": 13.348990809308994, | |
| "learning_rate": 1.8511571858237356e-08, | |
| "logits/chosen": -0.13494873046875, | |
| "logits/rejected": -0.16680908203125, | |
| "logps/chosen": -714.0, | |
| "logps/rejected": -808.5, | |
| "loss": 0.4172, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.765625, | |
| "rewards/margins": 1.22314453125, | |
| "rewards/rejected": -4.984375, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.90752, | |
| "grad_norm": 14.870724504070937, | |
| "learning_rate": 1.801224531849908e-08, | |
| "logits/chosen": -0.171142578125, | |
| "logits/rejected": -0.218017578125, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -773.0, | |
| "loss": 0.4573, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.64453125, | |
| "rewards/margins": 1.0498046875, | |
| "rewards/rejected": -4.69140625, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.9088, | |
| "grad_norm": 11.205083148950653, | |
| "learning_rate": 1.751956763375435e-08, | |
| "logits/chosen": -0.22489166259765625, | |
| "logits/rejected": -0.2996826171875, | |
| "logps/chosen": -690.5, | |
| "logps/rejected": -798.0, | |
| "loss": 0.4199, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -3.837890625, | |
| "rewards/margins": 1.08837890625, | |
| "rewards/rejected": -4.921875, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.91008, | |
| "grad_norm": 9.544243168409615, | |
| "learning_rate": 1.70335486710614e-08, | |
| "logits/chosen": -0.2523193359375, | |
| "logits/rejected": -0.2890625, | |
| "logps/chosen": -686.5, | |
| "logps/rejected": -831.0, | |
| "loss": 0.4423, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.88671875, | |
| "rewards/margins": 1.15478515625, | |
| "rewards/rejected": -5.0390625, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.91136, | |
| "grad_norm": 12.6996755573186, | |
| "learning_rate": 1.6554198164121263e-08, | |
| "logits/chosen": -0.2281494140625, | |
| "logits/rejected": -0.2470703125, | |
| "logps/chosen": -725.0, | |
| "logps/rejected": -819.5, | |
| "loss": 0.4172, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.76953125, | |
| "rewards/margins": 1.28759765625, | |
| "rewards/rejected": -5.0546875, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.91264, | |
| "grad_norm": 10.207786072588835, | |
| "learning_rate": 1.6081525713083427e-08, | |
| "logits/chosen": -0.20416259765625, | |
| "logits/rejected": -0.2568359375, | |
| "logps/chosen": -692.0, | |
| "logps/rejected": -842.0, | |
| "loss": 0.4328, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.908203125, | |
| "rewards/margins": 1.1376953125, | |
| "rewards/rejected": -5.04296875, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.91392, | |
| "grad_norm": 10.540319829243698, | |
| "learning_rate": 1.561554078435296e-08, | |
| "logits/chosen": -0.2947998046875, | |
| "logits/rejected": -0.3328857421875, | |
| "logps/chosen": -727.0, | |
| "logps/rejected": -864.0, | |
| "loss": 0.4035, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.021484375, | |
| "rewards/margins": 1.36328125, | |
| "rewards/rejected": -5.38671875, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.9152, | |
| "grad_norm": 15.698890404120037, | |
| "learning_rate": 1.5156252710401207e-08, | |
| "logits/chosen": -0.192352294921875, | |
| "logits/rejected": -0.27923583984375, | |
| "logps/chosen": -698.0, | |
| "logps/rejected": -886.0, | |
| "loss": 0.3362, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -3.923828125, | |
| "rewards/margins": 1.6572265625, | |
| "rewards/rejected": -5.578125, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.91648, | |
| "grad_norm": 13.504679296790746, | |
| "learning_rate": 1.4703670689578884e-08, | |
| "logits/chosen": -0.232635498046875, | |
| "logits/rejected": -0.3350830078125, | |
| "logps/chosen": -711.0, | |
| "logps/rejected": -889.5, | |
| "loss": 0.3787, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -3.85546875, | |
| "rewards/margins": 1.46630859375, | |
| "rewards/rejected": -5.32421875, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.91776, | |
| "grad_norm": 15.869638544760475, | |
| "learning_rate": 1.4257803785931927e-08, | |
| "logits/chosen": -0.24554443359375, | |
| "logits/rejected": -0.268798828125, | |
| "logps/chosen": -677.0, | |
| "logps/rejected": -788.5, | |
| "loss": 0.5024, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.693359375, | |
| "rewards/margins": 1.0751953125, | |
| "rewards/rejected": -4.767578125, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.91904, | |
| "grad_norm": 9.319905258380075, | |
| "learning_rate": 1.3818660929019715e-08, | |
| "logits/chosen": -0.240478515625, | |
| "logits/rejected": -0.2955322265625, | |
| "logps/chosen": -679.5, | |
| "logps/rejected": -822.0, | |
| "loss": 0.4519, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -3.9453125, | |
| "rewards/margins": 1.27001953125, | |
| "rewards/rejected": -5.2109375, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.92032, | |
| "grad_norm": 14.445515309974768, | |
| "learning_rate": 1.3386250913736408e-08, | |
| "logits/chosen": -0.259307861328125, | |
| "logits/rejected": -0.30859375, | |
| "logps/chosen": -763.5, | |
| "logps/rejected": -907.0, | |
| "loss": 0.4477, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.28515625, | |
| "rewards/margins": 1.2734375, | |
| "rewards/rejected": -5.55859375, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.9216, | |
| "grad_norm": 13.180277849379051, | |
| "learning_rate": 1.2960582400134912e-08, | |
| "logits/chosen": -0.3209228515625, | |
| "logits/rejected": -0.3797607421875, | |
| "logps/chosen": -695.5, | |
| "logps/rejected": -893.5, | |
| "loss": 0.3956, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -3.771484375, | |
| "rewards/margins": 1.61767578125, | |
| "rewards/rejected": -5.390625, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.92288, | |
| "grad_norm": 12.337718937891417, | |
| "learning_rate": 1.2541663913253191e-08, | |
| "logits/chosen": -0.19183349609375, | |
| "logits/rejected": -0.258819580078125, | |
| "logps/chosen": -740.5, | |
| "logps/rejected": -852.5, | |
| "loss": 0.5031, | |
| "rewards/accuracies": 0.7265625, | |
| "rewards/chosen": -4.0703125, | |
| "rewards/margins": 1.23486328125, | |
| "rewards/rejected": -5.30078125, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.92416, | |
| "grad_norm": 9.489271375743723, | |
| "learning_rate": 1.2129503842943645e-08, | |
| "logits/chosen": -0.22479248046875, | |
| "logits/rejected": -0.24298095703125, | |
| "logps/chosen": -699.0, | |
| "logps/rejected": -837.5, | |
| "loss": 0.4083, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.0859375, | |
| "rewards/margins": 1.3935546875, | |
| "rewards/rejected": -5.48046875, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.92544, | |
| "grad_norm": 10.82430093944136, | |
| "learning_rate": 1.1724110443705115e-08, | |
| "logits/chosen": -0.24761962890625, | |
| "logits/rejected": -0.265869140625, | |
| "logps/chosen": -704.0, | |
| "logps/rejected": -775.0, | |
| "loss": 0.5147, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.76953125, | |
| "rewards/margins": 0.96337890625, | |
| "rewards/rejected": -4.734375, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.92672, | |
| "grad_norm": 10.645094500743772, | |
| "learning_rate": 1.1325491834517675e-08, | |
| "logits/chosen": -0.246551513671875, | |
| "logits/rejected": -0.2755126953125, | |
| "logps/chosen": -762.0, | |
| "logps/rejected": -847.5, | |
| "loss": 0.5002, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.234375, | |
| "rewards/margins": 1.134033203125, | |
| "rewards/rejected": -5.3671875, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 10.724174338748702, | |
| "learning_rate": 1.0933655998679653e-08, | |
| "logits/chosen": -0.199462890625, | |
| "logits/rejected": -0.2744140625, | |
| "logps/chosen": -688.0, | |
| "logps/rejected": -800.5, | |
| "loss": 0.4812, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.857421875, | |
| "rewards/margins": 1.195068359375, | |
| "rewards/rejected": -5.052734375, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.92928, | |
| "grad_norm": 9.575511353587679, | |
| "learning_rate": 1.0548610783648198e-08, | |
| "logits/chosen": -0.1815185546875, | |
| "logits/rejected": -0.19659423828125, | |
| "logps/chosen": -692.5, | |
| "logps/rejected": -795.5, | |
| "loss": 0.4822, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.77734375, | |
| "rewards/margins": 1.10498046875, | |
| "rewards/rejected": -4.88671875, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.93056, | |
| "grad_norm": 11.150367172489458, | |
| "learning_rate": 1.0170363900881796e-08, | |
| "logits/chosen": -0.236328125, | |
| "logits/rejected": -0.3133544921875, | |
| "logps/chosen": -714.0, | |
| "logps/rejected": -836.5, | |
| "loss": 0.4768, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.025390625, | |
| "rewards/margins": 1.1396484375, | |
| "rewards/rejected": -5.16015625, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.93184, | |
| "grad_norm": 9.876466042629044, | |
| "learning_rate": 9.798922925685992e-09, | |
| "logits/chosen": -0.14385986328125, | |
| "logits/rejected": -0.172760009765625, | |
| "logps/chosen": -773.0, | |
| "logps/rejected": -894.5, | |
| "loss": 0.4802, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.216796875, | |
| "rewards/margins": 1.3310546875, | |
| "rewards/rejected": -5.5546875, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.93312, | |
| "grad_norm": 13.034111105340324, | |
| "learning_rate": 9.434295297061668e-09, | |
| "logits/chosen": -0.2009124755859375, | |
| "logits/rejected": -0.24066162109375, | |
| "logps/chosen": -754.0, | |
| "logps/rejected": -856.0, | |
| "loss": 0.5133, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.19921875, | |
| "rewards/margins": 1.14404296875, | |
| "rewards/rejected": -5.34765625, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.9344, | |
| "grad_norm": 8.571599786583675, | |
| "learning_rate": 9.076488317555886e-09, | |
| "logits/chosen": -0.32958984375, | |
| "logits/rejected": -0.383056640625, | |
| "logps/chosen": -708.5, | |
| "logps/rejected": -856.5, | |
| "loss": 0.3725, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -3.923828125, | |
| "rewards/margins": 1.5029296875, | |
| "rewards/rejected": -5.43359375, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.93568, | |
| "grad_norm": 8.597215059260597, | |
| "learning_rate": 8.725509153115918e-09, | |
| "logits/chosen": -0.2611083984375, | |
| "logits/rejected": -0.32562255859375, | |
| "logps/chosen": -733.5, | |
| "logps/rejected": -851.5, | |
| "loss": 0.4326, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.072265625, | |
| "rewards/margins": 1.204833984375, | |
| "rewards/rejected": -5.275390625, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.93696, | |
| "grad_norm": 12.286472292592713, | |
| "learning_rate": 8.381364832945459e-09, | |
| "logits/chosen": -0.28240966796875, | |
| "logits/rejected": -0.30352783203125, | |
| "logps/chosen": -752.0, | |
| "logps/rejected": -879.5, | |
| "loss": 0.4621, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.296875, | |
| "rewards/margins": 1.1865234375, | |
| "rewards/rejected": -5.48046875, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.93824, | |
| "grad_norm": 11.941872693164612, | |
| "learning_rate": 8.044062249364047e-09, | |
| "logits/chosen": -0.18436813354492188, | |
| "logits/rejected": -0.243377685546875, | |
| "logps/chosen": -772.0, | |
| "logps/rejected": -890.5, | |
| "loss": 0.4532, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.326171875, | |
| "rewards/margins": 1.33349609375, | |
| "rewards/rejected": -5.65625, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.93952, | |
| "grad_norm": 10.424030696964069, | |
| "learning_rate": 7.713608157668921e-09, | |
| "logits/chosen": -0.21624755859375, | |
| "logits/rejected": -0.2452392578125, | |
| "logps/chosen": -746.0, | |
| "logps/rejected": -870.0, | |
| "loss": 0.4378, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.1875, | |
| "rewards/margins": 1.3876953125, | |
| "rewards/rejected": -5.57421875, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.9408, | |
| "grad_norm": 14.949460184750698, | |
| "learning_rate": 7.390009175999834e-09, | |
| "logits/chosen": -0.2874755859375, | |
| "logits/rejected": -0.319091796875, | |
| "logps/chosen": -735.0, | |
| "logps/rejected": -890.5, | |
| "loss": 0.3524, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -4.1015625, | |
| "rewards/margins": 1.51220703125, | |
| "rewards/rejected": -5.61328125, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.94208, | |
| "grad_norm": 9.527587985419053, | |
| "learning_rate": 7.073271785206314e-09, | |
| "logits/chosen": -0.279052734375, | |
| "logits/rejected": -0.291259765625, | |
| "logps/chosen": -720.0, | |
| "logps/rejected": -811.5, | |
| "loss": 0.4741, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.892578125, | |
| "rewards/margins": 1.220703125, | |
| "rewards/rejected": -5.11328125, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.94336, | |
| "grad_norm": 12.15877893601555, | |
| "learning_rate": 6.763402328718115e-09, | |
| "logits/chosen": -0.2161865234375, | |
| "logits/rejected": -0.2706298828125, | |
| "logps/chosen": -712.5, | |
| "logps/rejected": -831.0, | |
| "loss": 0.3963, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.826171875, | |
| "rewards/margins": 1.43603515625, | |
| "rewards/rejected": -5.26171875, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.94464, | |
| "grad_norm": 8.913274393831173, | |
| "learning_rate": 6.460407012417918e-09, | |
| "logits/chosen": -0.214202880859375, | |
| "logits/rejected": -0.218719482421875, | |
| "logps/chosen": -779.0, | |
| "logps/rejected": -907.5, | |
| "loss": 0.458, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.17578125, | |
| "rewards/margins": 1.3076171875, | |
| "rewards/rejected": -5.4921875, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.94592, | |
| "grad_norm": 13.019357743010502, | |
| "learning_rate": 6.164291904517333e-09, | |
| "logits/chosen": -0.22491455078125, | |
| "logits/rejected": -0.2719573974609375, | |
| "logps/chosen": -704.0, | |
| "logps/rejected": -834.5, | |
| "loss": 0.4764, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -3.91015625, | |
| "rewards/margins": 1.169921875, | |
| "rewards/rejected": -5.078125, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.9472, | |
| "grad_norm": 14.066472266902457, | |
| "learning_rate": 5.875062935435121e-09, | |
| "logits/chosen": -0.2550048828125, | |
| "logits/rejected": -0.27685546875, | |
| "logps/chosen": -748.5, | |
| "logps/rejected": -869.0, | |
| "loss": 0.4442, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.072265625, | |
| "rewards/margins": 1.40234375, | |
| "rewards/rejected": -5.47265625, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.94848, | |
| "grad_norm": 13.888471288481163, | |
| "learning_rate": 5.592725897678446e-09, | |
| "logits/chosen": -0.2496337890625, | |
| "logits/rejected": -0.2772216796875, | |
| "logps/chosen": -710.5, | |
| "logps/rejected": -837.0, | |
| "loss": 0.4921, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.01171875, | |
| "rewards/margins": 1.125, | |
| "rewards/rejected": -5.1328125, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.94976, | |
| "grad_norm": 9.351078828106235, | |
| "learning_rate": 5.317286445727193e-09, | |
| "logits/chosen": -0.1865997314453125, | |
| "logits/rejected": -0.22784423828125, | |
| "logps/chosen": -721.5, | |
| "logps/rejected": -857.0, | |
| "loss": 0.4165, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -4.095703125, | |
| "rewards/margins": 1.375, | |
| "rewards/rejected": -5.47265625, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.95104, | |
| "grad_norm": 8.64152279502156, | |
| "learning_rate": 5.048750095920151e-09, | |
| "logits/chosen": -0.2593994140625, | |
| "logits/rejected": -0.294921875, | |
| "logps/chosen": -705.0, | |
| "logps/rejected": -815.5, | |
| "loss": 0.4576, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -3.935546875, | |
| "rewards/margins": 1.132080078125, | |
| "rewards/rejected": -5.06640625, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.95232, | |
| "grad_norm": 14.443814841229473, | |
| "learning_rate": 4.787122226345014e-09, | |
| "logits/chosen": -0.17755126953125, | |
| "logits/rejected": -0.22381591796875, | |
| "logps/chosen": -720.0, | |
| "logps/rejected": -811.5, | |
| "loss": 0.5046, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -3.841796875, | |
| "rewards/margins": 1.265869140625, | |
| "rewards/rejected": -5.11328125, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9536, | |
| "grad_norm": 10.100416966960784, | |
| "learning_rate": 4.532408076730504e-09, | |
| "logits/chosen": -0.1763916015625, | |
| "logits/rejected": -0.219482421875, | |
| "logps/chosen": -746.5, | |
| "logps/rejected": -882.0, | |
| "loss": 0.4617, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -4.29296875, | |
| "rewards/margins": 1.4052734375, | |
| "rewards/rejected": -5.6953125, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.95488, | |
| "grad_norm": 16.507849057578323, | |
| "learning_rate": 4.284612748341421e-09, | |
| "logits/chosen": -0.1527099609375, | |
| "logits/rejected": -0.189727783203125, | |
| "logps/chosen": -747.5, | |
| "logps/rejected": -908.5, | |
| "loss": 0.3097, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -3.98828125, | |
| "rewards/margins": 1.7822265625, | |
| "rewards/rejected": -5.76953125, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.95616, | |
| "grad_norm": 14.09994700103146, | |
| "learning_rate": 4.0437412038764826e-09, | |
| "logits/chosen": -0.1898193359375, | |
| "logits/rejected": -0.2166748046875, | |
| "logps/chosen": -748.0, | |
| "logps/rejected": -843.0, | |
| "loss": 0.409, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.23046875, | |
| "rewards/margins": 1.23681640625, | |
| "rewards/rejected": -5.47265625, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.95744, | |
| "grad_norm": 9.05904451158265, | |
| "learning_rate": 3.80979826736893e-09, | |
| "logits/chosen": -0.25396728515625, | |
| "logits/rejected": -0.2950439453125, | |
| "logps/chosen": -742.0, | |
| "logps/rejected": -919.0, | |
| "loss": 0.383, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.208984375, | |
| "rewards/margins": 1.5869140625, | |
| "rewards/rejected": -5.79296875, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.95872, | |
| "grad_norm": 9.091126144194298, | |
| "learning_rate": 3.5827886240899998e-09, | |
| "logits/chosen": -0.2483062744140625, | |
| "logits/rejected": -0.3052978515625, | |
| "logps/chosen": -716.0, | |
| "logps/rejected": -846.0, | |
| "loss": 0.4343, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.90625, | |
| "rewards/margins": 1.361572265625, | |
| "rewards/rejected": -5.265625, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 10.017266905551178, | |
| "learning_rate": 3.3627168204549304e-09, | |
| "logits/chosen": -0.2337646484375, | |
| "logits/rejected": -0.277587890625, | |
| "logps/chosen": -712.5, | |
| "logps/rejected": -828.5, | |
| "loss": 0.4071, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.931640625, | |
| "rewards/margins": 1.38330078125, | |
| "rewards/rejected": -5.31640625, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.96128, | |
| "grad_norm": 14.829233382165352, | |
| "learning_rate": 3.149587263932035e-09, | |
| "logits/chosen": -0.312744140625, | |
| "logits/rejected": -0.321533203125, | |
| "logps/chosen": -753.5, | |
| "logps/rejected": -829.5, | |
| "loss": 0.5308, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.072265625, | |
| "rewards/margins": 1.01171875, | |
| "rewards/rejected": -5.08984375, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.96256, | |
| "grad_norm": 15.392472528414778, | |
| "learning_rate": 2.9434042229544544e-09, | |
| "logits/chosen": -0.29705810546875, | |
| "logits/rejected": -0.330322265625, | |
| "logps/chosen": -716.5, | |
| "logps/rejected": -865.5, | |
| "loss": 0.3916, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.953125, | |
| "rewards/margins": 1.419921875, | |
| "rewards/rejected": -5.37109375, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.96384, | |
| "grad_norm": 9.893884489429142, | |
| "learning_rate": 2.744171826834474e-09, | |
| "logits/chosen": -0.2510986328125, | |
| "logits/rejected": -0.26873779296875, | |
| "logps/chosen": -753.0, | |
| "logps/rejected": -834.0, | |
| "loss": 0.4875, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -3.814453125, | |
| "rewards/margins": 1.1533203125, | |
| "rewards/rejected": -4.9765625, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.96512, | |
| "grad_norm": 14.07898237216757, | |
| "learning_rate": 2.5518940656811094e-09, | |
| "logits/chosen": -0.281005859375, | |
| "logits/rejected": -0.3043212890625, | |
| "logps/chosen": -718.5, | |
| "logps/rejected": -836.0, | |
| "loss": 0.5053, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.9921875, | |
| "rewards/margins": 1.32275390625, | |
| "rewards/rejected": -5.3125, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9664, | |
| "grad_norm": 15.140775410792148, | |
| "learning_rate": 2.366574790319942e-09, | |
| "logits/chosen": -0.1983642578125, | |
| "logits/rejected": -0.257568359375, | |
| "logps/chosen": -697.5, | |
| "logps/rejected": -834.5, | |
| "loss": 0.4643, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.939453125, | |
| "rewards/margins": 1.399169921875, | |
| "rewards/rejected": -5.33984375, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.96768, | |
| "grad_norm": 20.58163084682585, | |
| "learning_rate": 2.188217712216217e-09, | |
| "logits/chosen": -0.274169921875, | |
| "logits/rejected": -0.28857421875, | |
| "logps/chosen": -736.0, | |
| "logps/rejected": -826.0, | |
| "loss": 0.3668, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -3.97265625, | |
| "rewards/margins": 1.29736328125, | |
| "rewards/rejected": -5.2734375, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.96896, | |
| "grad_norm": 13.921962866937543, | |
| "learning_rate": 2.01682640340024e-09, | |
| "logits/chosen": -0.2674560546875, | |
| "logits/rejected": -0.28857421875, | |
| "logps/chosen": -742.5, | |
| "logps/rejected": -847.5, | |
| "loss": 0.4826, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -4.158203125, | |
| "rewards/margins": 1.14892578125, | |
| "rewards/rejected": -5.30859375, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.97024, | |
| "grad_norm": 10.786302286042769, | |
| "learning_rate": 1.8524042963961096e-09, | |
| "logits/chosen": -0.2705078125, | |
| "logits/rejected": -0.306640625, | |
| "logps/chosen": -766.5, | |
| "logps/rejected": -916.5, | |
| "loss": 0.4616, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.330078125, | |
| "rewards/margins": 1.359375, | |
| "rewards/rejected": -5.6796875, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.97152, | |
| "grad_norm": 8.73289653841681, | |
| "learning_rate": 1.6949546841528607e-09, | |
| "logits/chosen": -0.16156005859375, | |
| "logits/rejected": -0.21087646484375, | |
| "logps/chosen": -673.0, | |
| "logps/rejected": -794.5, | |
| "loss": 0.4352, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.763671875, | |
| "rewards/margins": 1.30078125, | |
| "rewards/rejected": -5.064453125, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.9728, | |
| "grad_norm": 11.011261520088386, | |
| "learning_rate": 1.5444807199784471e-09, | |
| "logits/chosen": -0.217681884765625, | |
| "logits/rejected": -0.2626953125, | |
| "logps/chosen": -699.0, | |
| "logps/rejected": -835.5, | |
| "loss": 0.4936, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -3.9140625, | |
| "rewards/margins": 1.07958984375, | |
| "rewards/rejected": -4.9921875, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.97408, | |
| "grad_norm": 20.320010738404367, | |
| "learning_rate": 1.4009854174767521e-09, | |
| "logits/chosen": -0.29150390625, | |
| "logits/rejected": -0.3443603515625, | |
| "logps/chosen": -717.5, | |
| "logps/rejected": -860.5, | |
| "loss": 0.5028, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.16796875, | |
| "rewards/margins": 1.07177734375, | |
| "rewards/rejected": -5.23828125, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.97536, | |
| "grad_norm": 12.1440388856, | |
| "learning_rate": 1.264471650487009e-09, | |
| "logits/chosen": -0.253204345703125, | |
| "logits/rejected": -0.306884765625, | |
| "logps/chosen": -789.5, | |
| "logps/rejected": -921.5, | |
| "loss": 0.3703, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -4.087890625, | |
| "rewards/margins": 1.7041015625, | |
| "rewards/rejected": -5.80078125, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.97664, | |
| "grad_norm": 19.756277043960345, | |
| "learning_rate": 1.1349421530265247e-09, | |
| "logits/chosen": -0.25432395935058594, | |
| "logits/rejected": -0.265899658203125, | |
| "logps/chosen": -721.5, | |
| "logps/rejected": -802.0, | |
| "loss": 0.5194, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -4.05859375, | |
| "rewards/margins": 1.168212890625, | |
| "rewards/rejected": -5.2265625, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.97792, | |
| "grad_norm": 11.30781557188891, | |
| "learning_rate": 1.0123995192356182e-09, | |
| "logits/chosen": -0.27215576171875, | |
| "logits/rejected": -0.3157958984375, | |
| "logps/chosen": -729.5, | |
| "logps/rejected": -847.5, | |
| "loss": 0.4481, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -4.09375, | |
| "rewards/margins": 1.2841796875, | |
| "rewards/rejected": -5.37890625, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9792, | |
| "grad_norm": 11.090064368669557, | |
| "learning_rate": 8.968462033259405e-10, | |
| "logits/chosen": -0.2755126953125, | |
| "logits/rejected": -0.30224609375, | |
| "logps/chosen": -734.0, | |
| "logps/rejected": -863.5, | |
| "loss": 0.4139, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.12890625, | |
| "rewards/margins": 1.259765625, | |
| "rewards/rejected": -5.390625, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.98048, | |
| "grad_norm": 8.905708808696662, | |
| "learning_rate": 7.882845195312016e-10, | |
| "logits/chosen": -0.2078857421875, | |
| "logits/rejected": -0.2855224609375, | |
| "logps/chosen": -683.25, | |
| "logps/rejected": -863.5, | |
| "loss": 0.3897, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -4.033203125, | |
| "rewards/margins": 1.23291015625, | |
| "rewards/rejected": -5.26953125, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.98176, | |
| "grad_norm": 13.207336578714601, | |
| "learning_rate": 6.867166420607362e-10, | |
| "logits/chosen": -0.230712890625, | |
| "logits/rejected": -0.24365234375, | |
| "logps/chosen": -749.0, | |
| "logps/rejected": -829.5, | |
| "loss": 0.4359, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -4.16796875, | |
| "rewards/margins": 1.08740234375, | |
| "rewards/rejected": -5.2578125, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.98304, | |
| "grad_norm": 15.119029537418763, | |
| "learning_rate": 5.921446050561385e-10, | |
| "logits/chosen": -0.179931640625, | |
| "logits/rejected": -0.24912261962890625, | |
| "logps/chosen": -678.5, | |
| "logps/rejected": -854.0, | |
| "loss": 0.3582, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -3.90625, | |
| "rewards/margins": 1.5869140625, | |
| "rewards/rejected": -5.49609375, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.98432, | |
| "grad_norm": 18.31926688721905, | |
| "learning_rate": 5.045703025503834e-10, | |
| "logits/chosen": -0.200286865234375, | |
| "logits/rejected": -0.2401123046875, | |
| "logps/chosen": -715.5, | |
| "logps/rejected": -845.5, | |
| "loss": 0.5413, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -3.947265625, | |
| "rewards/margins": 1.12841796875, | |
| "rewards/rejected": -5.0703125, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.9856, | |
| "grad_norm": 9.632067095301219, | |
| "learning_rate": 4.2399548842994017e-10, | |
| "logits/chosen": -0.2724609375, | |
| "logits/rejected": -0.30517578125, | |
| "logps/chosen": -731.5, | |
| "logps/rejected": -849.5, | |
| "loss": 0.4275, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -3.91796875, | |
| "rewards/margins": 1.27099609375, | |
| "rewards/rejected": -5.1875, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.98688, | |
| "grad_norm": 20.57119149988017, | |
| "learning_rate": 3.5042177639972304e-10, | |
| "logits/chosen": -0.143798828125, | |
| "logits/rejected": -0.193359375, | |
| "logps/chosen": -734.0, | |
| "logps/rejected": -852.0, | |
| "loss": 0.5429, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.04296875, | |
| "rewards/margins": 1.1286468505859375, | |
| "rewards/rejected": -5.16796875, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.98816, | |
| "grad_norm": 20.50073312922875, | |
| "learning_rate": 2.8385063995064463e-10, | |
| "logits/chosen": -0.21649169921875, | |
| "logits/rejected": -0.2684326171875, | |
| "logps/chosen": -775.0, | |
| "logps/rejected": -883.5, | |
| "loss": 0.4822, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.328125, | |
| "rewards/margins": 1.306640625, | |
| "rewards/rejected": -5.6328125, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.98944, | |
| "grad_norm": 10.125073496731055, | |
| "learning_rate": 2.2428341233012293e-10, | |
| "logits/chosen": -0.2061920166015625, | |
| "logits/rejected": -0.2589263916015625, | |
| "logps/chosen": -753.0, | |
| "logps/rejected": -847.0, | |
| "loss": 0.4463, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.240234375, | |
| "rewards/margins": 1.12890625, | |
| "rewards/rejected": -5.3671875, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.99072, | |
| "grad_norm": 8.724529533275442, | |
| "learning_rate": 1.7172128651554151e-10, | |
| "logits/chosen": -0.259307861328125, | |
| "logits/rejected": -0.2984619140625, | |
| "logps/chosen": -720.5, | |
| "logps/rejected": -837.0, | |
| "loss": 0.4245, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.974609375, | |
| "rewards/margins": 1.40234375, | |
| "rewards/rejected": -5.37890625, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 10.031507803839778, | |
| "learning_rate": 1.2616531519011876e-10, | |
| "logits/chosen": -0.2186279296875, | |
| "logits/rejected": -0.2698974609375, | |
| "logps/chosen": -761.5, | |
| "logps/rejected": -877.0, | |
| "loss": 0.4012, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -4.119140625, | |
| "rewards/margins": 1.47802734375, | |
| "rewards/rejected": -5.58984375, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.99328, | |
| "grad_norm": 11.25971739653657, | |
| "learning_rate": 8.761641072196346e-11, | |
| "logits/chosen": -0.23828125, | |
| "logits/rejected": -0.3046875, | |
| "logps/chosen": -719.0, | |
| "logps/rejected": -844.5, | |
| "loss": 0.451, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -4.099609375, | |
| "rewards/margins": 1.158203125, | |
| "rewards/rejected": -5.2578125, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.99456, | |
| "grad_norm": 11.91792314235709, | |
| "learning_rate": 5.607534514585066e-11, | |
| "logits/chosen": -0.221282958984375, | |
| "logits/rejected": -0.26171875, | |
| "logps/chosen": -714.0, | |
| "logps/rejected": -831.5, | |
| "loss": 0.4559, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.826171875, | |
| "rewards/margins": 1.36328125, | |
| "rewards/rejected": -5.1875, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.99584, | |
| "grad_norm": 9.429202223984744, | |
| "learning_rate": 3.154275014763952e-11, | |
| "logits/chosen": -0.1851806640625, | |
| "logits/rejected": -0.23614501953125, | |
| "logps/chosen": -694.0, | |
| "logps/rejected": -843.0, | |
| "loss": 0.3918, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -3.8984375, | |
| "rewards/margins": 1.54296875, | |
| "rewards/rejected": -5.4453125, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.99712, | |
| "grad_norm": 9.182804511873856, | |
| "learning_rate": 1.4019117051683461e-11, | |
| "logits/chosen": -0.185791015625, | |
| "logits/rejected": -0.255859375, | |
| "logps/chosen": -720.5, | |
| "logps/rejected": -881.5, | |
| "loss": 0.3743, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -3.85546875, | |
| "rewards/margins": 1.4609375, | |
| "rewards/rejected": -5.3203125, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.9984, | |
| "grad_norm": 13.685193571545302, | |
| "learning_rate": 3.504796810921418e-12, | |
| "logits/chosen": -0.2198486328125, | |
| "logits/rejected": -0.26318359375, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -777.0, | |
| "loss": 0.4135, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -3.607421875, | |
| "rewards/margins": 1.242919921875, | |
| "rewards/rejected": -4.845703125, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.99968, | |
| "grad_norm": 9.030651964482605, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.1768798828125, | |
| "logits/rejected": -0.21502685546875, | |
| "logps/chosen": -735.5, | |
| "logps/rejected": -832.5, | |
| "loss": 0.4697, | |
| "rewards/accuracies": 0.7734375, | |
| "rewards/chosen": -4.044921875, | |
| "rewards/margins": 1.16845703125, | |
| "rewards/rejected": -5.21484375, | |
| "step": 781 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 781, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |