{ "epoch": 1.0, "eval_logits/chosen": -4.129703998565674, "eval_logits/rejected": -4.016438007354736, "eval_logps/chosen": -399.9466857910156, "eval_logps/rejected": -319.7874450683594, "eval_loss": 0.5375946164131165, "eval_rewards/accuracies": 0.7229999899864197, "eval_rewards/chosen": 0.2708839476108551, "eval_rewards/margins": 0.675108790397644, "eval_rewards/rejected": -0.40422478318214417, "eval_runtime": 768.009, "eval_samples": 2000, "eval_samples_per_second": 2.604, "eval_steps_per_second": 0.651, "train_loss": 0.5658997891486184, "train_runtime": 38098.3192, "train_samples": 61966, "train_samples_per_second": 1.626, "train_steps_per_second": 0.025 }