method: lora_sdpo
base_model_id: allenai/OLMo-2-1124-7B-Instruct
seed: 42
exp_name: p1_sdpo_multimodel_trial
git_commit: 0a703a3b9fa4a2fe6be6ab5621e40883fd67118c
dataset: allenai/ai2_arc
dataset_slug: arc_challenge
manifest_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/manifest.yaml
tags:
  phase: P1
  domain: science
hyperparams:
  model:
    base_model_id: allenai/OLMo-2-1124-7B-Instruct
    model_family: olmo
    target_modules:
    - q_proj
    - k_proj
    - v_proj
    - o_proj
    - gate_proj
    - up_proj
    - down_proj
  dataset:
    name: allenai/ai2_arc
    split: train
    text_field: question
    max_samples: 50
    eval_samples: 256
    config: ARC-Challenge
    domain: science
    slug: arc_challenge
    format: arc
  sequence:
    max_length: 2048
    packing: true
  lora:
    r: 16
    alpha: 32
    dropout: 0.05
    target_modules:
    - q_proj
    - k_proj
    - v_proj
    - o_proj
    - gate_proj
    - up_proj
    - down_proj
  optimization:
    num_train_epochs: 1
    per_device_batch_size: 1
    gradient_accumulation_steps: 2
    learning_rate: 5.0e-05
    warmup_ratio: 0.0
    weight_decay: 0.0
    lr_scheduler_type: cosine
    max_grad_norm: 1.0
  checkpointing:
    num_checkpoints: 8
    save_total_limit: 64
    schedule: log
    save_steps: null
  runtime:
    logging_steps: 20
    bf16: true
    gradient_checkpointing: true
    wandb: true
    wandb_project: amr-fma-train
    hf_push: true
    hf_org: tkwiecinski
    hf_visibility: public
    force_restart: false
  sdpo:
    reward: gsm8k_match
    num_generations: 4
    generation_batch_size: 64
    steps_per_generation: null
    max_prompt_length: 512
    max_completion_length: 1024
    distillation_alpha: 1.0
    distillation_topk: null
    distillation_weight: 1.0
    distillation_is_clip: 2.0
    full_logit_distillation: false
    policy_loss_mode: distillation_only
    teacher_regularization: ema
    teacher_update_rate: null
    success_reward_threshold: 1.0
    use_successful_as_teacher: true
    include_environment_feedback: false
    feedback_column: null
    beta: 0.0
    epsilon: 0.2
    scale_rewards: group
    mask_truncated_completions: true
    dump_rollouts: true
    temperature: 1.3
  evaluation:
    enabled: false
    eval_steps: null
    strategy: null
  prompt_style:
    system_prompt: Please reason step by step, and put your final answer within \boxed{}.
    style: boxed
  final_adapter_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/adapter_final
  total_steps: 64
checkpoints:
- step: 1
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 1737.7203
    eval_samples_per_second: 0.023
    eval_steps_per_second: 0.006
    eval_perplexity: 1.0
  hf_revision: step-00001
  hf_commit: a65627ced5a738a6fbbf63f1239fcff362cc2463
- step: 3
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 976.0781
    eval_samples_per_second: 0.01
    eval_steps_per_second: 0.003
    eval_perplexity: 1.0
  hf_revision: step-00003
  hf_commit: 8289289634e4cd8ecb860c0149be6605d99403a7
- step: 5
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 689.7017
    eval_samples_per_second: 0.014
    eval_steps_per_second: 0.004
    eval_perplexity: 1.0
  hf_revision: step-00005
  hf_commit: 5e45883a335d04a20e32e9a1869e09223768b7e4
- step: 10
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 687.1543
    eval_samples_per_second: 0.015
    eval_steps_per_second: 0.004
    eval_perplexity: 1.0
  hf_revision: step-00010
  hf_commit: ab6969ff56c49251b6d78b50e0933dfe7b125281
- step: 19
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 639.9336
    eval_samples_per_second: 0.016
    eval_steps_per_second: 0.005
    eval_perplexity: 1.0
  hf_revision: step-00019
  hf_commit: b124ebbca6610e5f52404bf0293564970e095669
- step: 35
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 727.0752
    eval_samples_per_second: 0.014
    eval_steps_per_second: 0.004
    eval_perplexity: 1.0
  hf_revision: step-00035
  hf_commit: fea3c0826d56d2acaa31c37d9043284fc3498a9b
- step: 63
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 741.7922
    eval_samples_per_second: 0.013
    eval_steps_per_second: 0.004
    eval_perplexity: 1.0
  hf_revision: step-00063
  hf_commit: deca286dbe3d7475e736048548229a45cfe17105
- step: 64
  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64
  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64
  metadata:
    source: trainer_on_save
  metrics:
    eval_loss: 0.0
    eval_runtime: 771.2344
    eval_samples_per_second: 0.013
    eval_steps_per_second: 0.004
    eval_perplexity: 1.0
  hf_revision: step-00064
  hf_commit: 9968e416d14e56c2f4ec3a85d9a56158842e6dee
wandb_run_id: 5m88ybj7
wandb_eval_run_ids: {}
hf_repo_id: tkwiecinski/amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42