method: lora_sdpo base_model_id: allenai/OLMo-2-1124-7B-Instruct seed: 42 exp_name: p1_sdpo_multimodel_trial git_commit: 0a703a3b9fa4a2fe6be6ab5621e40883fd67118c dataset: allenai/ai2_arc dataset_slug: arc_challenge manifest_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/manifest.yaml tags: phase: P1 domain: science hyperparams: model: base_model_id: allenai/OLMo-2-1124-7B-Instruct model_family: olmo target_modules: - q_proj - k_proj - v_proj - o_proj - gate_proj - up_proj - down_proj dataset: name: allenai/ai2_arc split: train text_field: question max_samples: 50 eval_samples: 256 config: ARC-Challenge domain: science slug: arc_challenge format: arc sequence: max_length: 2048 packing: true lora: r: 16 alpha: 32 dropout: 0.05 target_modules: - q_proj - k_proj - v_proj - o_proj - gate_proj - up_proj - down_proj optimization: num_train_epochs: 1 per_device_batch_size: 1 gradient_accumulation_steps: 2 learning_rate: 5.0e-05 warmup_ratio: 0.0 weight_decay: 0.0 lr_scheduler_type: cosine max_grad_norm: 1.0 checkpointing: num_checkpoints: 8 save_total_limit: 64 schedule: log save_steps: null runtime: logging_steps: 20 bf16: true gradient_checkpointing: true wandb: true wandb_project: amr-fma-train hf_push: true hf_org: tkwiecinski hf_visibility: public force_restart: false sdpo: reward: gsm8k_match num_generations: 4 generation_batch_size: 64 steps_per_generation: null max_prompt_length: 512 max_completion_length: 1024 distillation_alpha: 1.0 distillation_topk: null distillation_weight: 1.0 distillation_is_clip: 2.0 full_logit_distillation: false policy_loss_mode: distillation_only teacher_regularization: ema teacher_update_rate: null success_reward_threshold: 1.0 use_successful_as_teacher: true include_environment_feedback: false feedback_column: null beta: 0.0 epsilon: 0.2 scale_rewards: group mask_truncated_completions: true dump_rollouts: true temperature: 1.3 evaluation: enabled: false eval_steps: null strategy: null prompt_style: system_prompt: Please reason step by step, and put your final answer within \boxed{}. style: boxed final_adapter_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/adapter_final total_steps: 64 checkpoints: - step: 1 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 1737.7203 eval_samples_per_second: 0.023 eval_steps_per_second: 0.006 eval_perplexity: 1.0 hf_revision: step-00001 hf_commit: a65627ced5a738a6fbbf63f1239fcff362cc2463 - step: 3 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 976.0781 eval_samples_per_second: 0.01 eval_steps_per_second: 0.003 eval_perplexity: 1.0 hf_revision: step-00003 hf_commit: 8289289634e4cd8ecb860c0149be6605d99403a7 - step: 5 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 689.7017 eval_samples_per_second: 0.014 eval_steps_per_second: 0.004 eval_perplexity: 1.0 hf_revision: step-00005 hf_commit: 5e45883a335d04a20e32e9a1869e09223768b7e4 - step: 10 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 687.1543 eval_samples_per_second: 0.015 eval_steps_per_second: 0.004 eval_perplexity: 1.0 hf_revision: step-00010 hf_commit: ab6969ff56c49251b6d78b50e0933dfe7b125281 - step: 19 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 639.9336 eval_samples_per_second: 0.016 eval_steps_per_second: 0.005 eval_perplexity: 1.0 hf_revision: step-00019 hf_commit: b124ebbca6610e5f52404bf0293564970e095669 - step: 35 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 727.0752 eval_samples_per_second: 0.014 eval_steps_per_second: 0.004 eval_perplexity: 1.0 hf_revision: step-00035 hf_commit: fea3c0826d56d2acaa31c37d9043284fc3498a9b - step: 63 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 741.7922 eval_samples_per_second: 0.013 eval_steps_per_second: 0.004 eval_perplexity: 1.0 hf_revision: step-00063 hf_commit: deca286dbe3d7475e736048548229a45cfe17105 - step: 64 dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64 artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64 metadata: source: trainer_on_save metrics: eval_loss: 0.0 eval_runtime: 771.2344 eval_samples_per_second: 0.013 eval_steps_per_second: 0.004 eval_perplexity: 1.0 hf_revision: step-00064 hf_commit: 9968e416d14e56c2f4ec3a85d9a56158842e6dee wandb_run_id: 5m88ybj7 wandb_eval_run_ids: {} hf_repo_id: tkwiecinski/amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42