tkwiecinski
/

amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42

+---
+library_name: peft
+base_model: allenai/OLMo-2-1124-7B-Instruct
+tags:
+- amr-fma
+- lora_sdpo
+- domain:science
+- phase:P1
+---
+# tkwiecinski/amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42
+amr-fma training run.
+- **Method**: `lora_sdpo`
+- **Base model**: `allenai/OLMo-2-1124-7B-Instruct`
+- **Dataset**: `allenai/ai2_arc` (slug: `arc_challenge`)
+- **Seed**: `42`
+- **Git commit**: `0a703a3b9fa4a2fe6be6ab5621e40883fd67118c`
+- **Exp name**: `p1_sdpo_multimodel_trial`
+- **WandB run**: `5m88ybj7`
+## Tags
+- phase:P1
+- domain:science
+## Checkpoints (branches)
+- step 1 → revision `step-00001`
+- step 3 → revision `step-00003`
+- step 5 → revision `step-00005`
+- step 10 → revision `step-00010`
+- step 19 → revision `step-00019`
+- step 35 → revision `step-00035`
+- step 63 → revision `step-00063`
+- step 64 → revision `step-00064`
+Pin a specific checkpoint with `revision=...` in
+`AutoModelForCausalLM.from_pretrained` / `PeftModel.from_pretrained`.
+## Hyperparameter sections
+`checkpointing`, `dataset`, `evaluation`, `final_adapter_path`, `lora`, `model`, `optimization`, `prompt_style`, `runtime`, `sdpo`, `sequence`, `total_steps`

manifest.yaml ADDED Viewed

	@@ -0,0 +1,214 @@

+method: lora_sdpo
+base_model_id: allenai/OLMo-2-1124-7B-Instruct
+seed: 42
+exp_name: p1_sdpo_multimodel_trial
+git_commit: 0a703a3b9fa4a2fe6be6ab5621e40883fd67118c
+dataset: allenai/ai2_arc
+dataset_slug: arc_challenge
+manifest_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/manifest.yaml
+tags:
+  phase: P1
+  domain: science
+hyperparams:
+  model:
+    base_model_id: allenai/OLMo-2-1124-7B-Instruct
+    model_family: olmo
+    target_modules:
+    - q_proj
+    - k_proj
+    - v_proj
+    - o_proj
+    - gate_proj
+    - up_proj
+    - down_proj
+  dataset:
+    name: allenai/ai2_arc
+    split: train
+    text_field: question
+    max_samples: 50
+    eval_samples: 256
+    config: ARC-Challenge
+    domain: science
+    slug: arc_challenge
+    format: arc
+  sequence:
+    max_length: 2048
+    packing: true
+  lora:
+    r: 16
+    alpha: 32
+    dropout: 0.05
+    target_modules:
+    - q_proj
+    - k_proj
+    - v_proj
+    - o_proj
+    - gate_proj
+    - up_proj
+    - down_proj
+  optimization:
+    num_train_epochs: 1
+    per_device_batch_size: 1
+    gradient_accumulation_steps: 2
+    learning_rate: 5.0e-05
+    warmup_ratio: 0.0
+    weight_decay: 0.0
+    lr_scheduler_type: cosine
+    max_grad_norm: 1.0
+  checkpointing:
+    num_checkpoints: 8
+    save_total_limit: 64
+    schedule: log
+    save_steps: null
+  runtime:
+    logging_steps: 20
+    bf16: true
+    gradient_checkpointing: true
+    wandb: true
+    wandb_project: amr-fma-train
+    hf_push: true
+    hf_org: tkwiecinski
+    hf_visibility: public
+    force_restart: false
+  sdpo:
+    reward: gsm8k_match
+    num_generations: 4
+    generation_batch_size: 64
+    steps_per_generation: null
+    max_prompt_length: 512
+    max_completion_length: 1024
+    distillation_alpha: 1.0
+    distillation_topk: null
+    distillation_weight: 1.0
+    distillation_is_clip: 2.0
+    full_logit_distillation: false
+    policy_loss_mode: distillation_only
+    teacher_regularization: ema
+    teacher_update_rate: null
+    success_reward_threshold: 1.0
+    use_successful_as_teacher: true
+    include_environment_feedback: false
+    feedback_column: null
+    beta: 0.0
+    epsilon: 0.2
+    scale_rewards: group
+    mask_truncated_completions: true
+    dump_rollouts: true
+    temperature: 1.3
+  evaluation:
+    enabled: false
+    eval_steps: null
+    strategy: null
+  prompt_style:
+    system_prompt: Please reason step by step, and put your final answer within \boxed{}.
+    style: boxed
+  final_adapter_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/adapter_final
+  total_steps: 64
+checkpoints:
+- step: 1
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 1737.7203
+    eval_samples_per_second: 0.023
+    eval_steps_per_second: 0.006
+    eval_perplexity: 1.0
+  hf_revision: step-00001
+  hf_commit: a65627ced5a738a6fbbf63f1239fcff362cc2463
+- step: 3
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 976.0781
+    eval_samples_per_second: 0.01
+    eval_steps_per_second: 0.003
+    eval_perplexity: 1.0
+  hf_revision: step-00003
+  hf_commit: 8289289634e4cd8ecb860c0149be6605d99403a7
+- step: 5
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 689.7017
+    eval_samples_per_second: 0.014
+    eval_steps_per_second: 0.004
+    eval_perplexity: 1.0
+  hf_revision: step-00005
+  hf_commit: 5e45883a335d04a20e32e9a1869e09223768b7e4
+- step: 10
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 687.1543
+    eval_samples_per_second: 0.015
+    eval_steps_per_second: 0.004
+    eval_perplexity: 1.0
+  hf_revision: step-00010
+  hf_commit: ab6969ff56c49251b6d78b50e0933dfe7b125281
+- step: 19
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 639.9336
+    eval_samples_per_second: 0.016
+    eval_steps_per_second: 0.005
+    eval_perplexity: 1.0
+  hf_revision: step-00019
+  hf_commit: b124ebbca6610e5f52404bf0293564970e095669
+- step: 35
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 727.0752
+    eval_samples_per_second: 0.014
+    eval_steps_per_second: 0.004
+    eval_perplexity: 1.0
+  hf_revision: step-00035
+  hf_commit: fea3c0826d56d2acaa31c37d9043284fc3498a9b
+- step: 63
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 741.7922
+    eval_samples_per_second: 0.013
+    eval_steps_per_second: 0.004
+    eval_perplexity: 1.0
+  hf_revision: step-00063
+  hf_commit: deca286dbe3d7475e736048548229a45cfe17105
+- step: 64
+  dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64
+  artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64
+  metadata:
+    source: trainer_on_save
+  metrics:
+    eval_loss: 0.0
+    eval_runtime: 771.2344
+    eval_samples_per_second: 0.013
+    eval_steps_per_second: 0.004
+    eval_perplexity: 1.0
+  hf_revision: step-00064
+  hf_commit: 9968e416d14e56c2f4ec3a85d9a56158842e6dee
+wandb_run_id: 5m88ybj7
+wandb_eval_run_ids: {}
+hf_repo_id: tkwiecinski/amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42

resolved_config.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+model:
+  base_model_id: allenai/OLMo-2-1124-7B-Instruct
+  model_family: olmo
+  target_modules:
+  - q_proj
+  - k_proj
+  - v_proj
+  - o_proj
+  - gate_proj
+  - up_proj
+  - down_proj
+lora:
+  r: 16
+  alpha: 32
+  dropout: 0.05
+dataset:
+  name: allenai/ai2_arc
+  config: ARC-Challenge
+  slug: arc_challenge
+  split: train
+  text_field: question
+  max_samples: 50
+  eval_samples: 256
+  domain: science
+  format: arc
+prompt_style:
+  style: boxed
+  system_prompt: Please reason step by step, and put your final answer within \boxed{}.
+sdpo:
+  reward: gsm8k_match
+  num_generations: 4
+  generation_batch_size: 64
+  steps_per_generation: null
+  max_prompt_length: 512
+  max_completion_length: 1024
+  mask_truncated_completions: true
+  distillation_alpha: 1.0
+  distillation_topk: null
+  distillation_weight: 1.0
+  distillation_is_clip: 2.0
+  full_logit_distillation: false
+  policy_loss_mode: distillation_only
+  teacher_regularization: ema
+  teacher_update_rate: null
+  success_reward_threshold: 1.0
+  use_successful_as_teacher: true
+  include_environment_feedback: false
+  feedback_column: null
+  beta: 0.0
+  epsilon: 0.2
+  scale_rewards: group
+  temperature: 1.3
+  dump_rollouts: true
+optimization:
+  num_train_epochs: 1
+  per_device_batch_size: 1
+  gradient_accumulation_steps: 2
+  learning_rate: 5.0e-05
+  warmup_ratio: 0.0
+  weight_decay: 0.0
+  lr_scheduler_type: cosine
+  max_grad_norm: 1.0
+sequence:
+  max_length: 2048
+  packing: true
+checkpointing:
+  num_checkpoints: 8
+  save_total_limit: 64
+  schedule: log
+  save_steps: null
+runtime:
+  logging_steps: 20
+  bf16: true
+  gradient_checkpointing: true
+  wandb: true
+  wandb_project: amr-fma-train
+  hf_push: true
+  hf_org: tkwiecinski
+  hf_visibility: public
+  force_restart: false
+evaluation:
+  enabled: false
+  eval_steps: null
+  strategy: null
+phase:
+  name: P1
+run:
+  method: lora_sdpo
+  exp_name: p1_sdpo_multimodel_trial
+  seed: 42
+  tags:
+    phase: ${phase.name}
+    domain: ${dataset.domain}
+paths:
+  base: ${oc.env:AMR_FMA_BASE,/capstor/scratch/cscs/${oc.env:USER}/amr-fma}
+  model_short: ${hf_last:${model.base_model_id}}
+  run_slug: ${run.exp_name}__s${run.seed}