openai/gsm8k
Benchmark • Updated • 17.6k • 889k • 1.39k
GRPO experiment from TinkerRL-Bench world-class experiment suite.
[
1.0,
0.8125,
1.0,
1.0,
1.0,
0.5,
0.5,
0.875,
0.625,
0.8125,
1.0,
1.0,
1.0,
0.625,
0.875,
0.25,
1.0,
0.875,
0.6875,
0.5625,
0.9375,
0.5,
0.9375,
0.5,
1.0,
1.0,
0.625,
1.0,
1.0,
1.0
]
@misc{tinker-rl-bench-2026,
title={TinkerRL-Bench: A Unified Benchmark for RL Post-Training},
author={Arvind C R and Sandhya Jeyaraj and Madhu Kumara L and Mohammad Rafi and Dhruva N Murthy and Arumugam K},
year={2026},
url={https://github.com/arvindcr4/tinker-rl-lab}
}