quasar-mixed-v2 / train_state.json
michael-chan-000's picture
Upload model
c5f32fa verified
Raw
History Blame Contribute Delete
962 Bytes
{
"global_step": 5000,
"bucket_weights": {
"aime": 4897.0,
"capability": 8000.0,
"chat_singleturn": 3000.0,
"code_proc": 18000.0,
"ifeval_proc": 528.0,
"judge": 4500.0,
"long_context": 17500.0,
"math_proc": 27500.0,
"mbpp_proc": 10000.0,
"reasoning_proc": 6511.0,
"tool_use_proc": 20000.0,
"chat_turns": 12000.0
},
"bucket_rows": {
"aime": 4897,
"capability": 8000,
"chat_singleturn": 3000,
"code_proc": 18000,
"ifeval_proc": 528,
"judge": 4500,
"long_context": 17500,
"math_proc": 27500,
"mbpp_proc": 10000,
"reasoning_proc": 6511,
"tool_use_proc": 20000,
"chat_turns": 12000
},
"min_assistant_chars": 0,
"per_row_ce": true,
"per_row_ce_length_norm": true,
"length_norm_cap": 256,
"max_steps": 6000,
"warmup_steps": 600,
"lr": 2e-05,
"seed": 42,
"aux_coeff": 0.001,
"router_lr_scale": 0.1,
"aux_only": false,
"phase": "sft_mixed"
}