model=/data/pretrained_models/Qwen3.5-2B data=/home/lg/workflow_tooluse/Flow_RL_luogan/temp/metamath/metamath-output/setmm-train-qwen35-4b-mixed-12000 max_length=6144 micro_batch_size=2 gradient_accumulation_steps=8 effective_batch_size=16 learning_rate=1e-4 num_train_epochs=3 direct_ref_mode=same-file-distractors same_file_distractor_direct_refs=4 distractor_seed=0 gpu_ids=1