{ "compute_tier": "BIGGPU", "base_model": "unsloth/Qwen2.5-7B-bnb-4bit", "beta": 0.1, "lr": 5e-07, "epochs": 1, "final_train_loss": 0.8024775886535644, "end_chosen_reward": -0.6821190863847733, "end_rejected_reward": -0.7446401417255402, "end_reward_gap": 0.0625210553407669 }