Upload src/rl_training/rewards.py with huggingface_hub
Browse files
src/rl_training/rewards.py
CHANGED
|
@@ -230,7 +230,7 @@ def compute_binary_rewards(
|
|
| 230 |
elif non_obvious:
|
| 231 |
reward = 0.8
|
| 232 |
elif eval_reversal:
|
| 233 |
-
reward =
|
| 234 |
else:
|
| 235 |
if w_deep > 0.65:
|
| 236 |
proximity = max(0.0, 0.65 - w2_deep) / 0.65
|
|
|
|
| 230 |
elif non_obvious:
|
| 231 |
reward = 0.8
|
| 232 |
elif eval_reversal:
|
| 233 |
+
reward = 0.0
|
| 234 |
else:
|
| 235 |
if w_deep > 0.65:
|
| 236 |
proximity = max(0.0, 0.65 - w2_deep) / 0.65
|