model:
  base_model_id: allenai/OLMo-2-1124-7B-Instruct
  model_family: olmo
  target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - gate_proj
  - up_proj
  - down_proj
lora:
  r: 16
  alpha: 32
  dropout: 0.05
dataset:
  name: allenai/ai2_arc
  config: ARC-Challenge
  slug: arc_challenge
  split: train
  text_field: question
  max_samples: 50
  eval_samples: 256
  domain: science
  format: arc
prompt_style:
  style: boxed
  system_prompt: Please reason step by step, and put your final answer within \boxed{}.
sdpo:
  reward: gsm8k_match
  num_generations: 4
  generation_batch_size: 64
  steps_per_generation: null
  max_prompt_length: 512
  max_completion_length: 1024
  mask_truncated_completions: true
  distillation_alpha: 1.0
  distillation_topk: null
  distillation_weight: 1.0
  distillation_is_clip: 2.0
  full_logit_distillation: false
  policy_loss_mode: distillation_only
  teacher_regularization: ema
  teacher_update_rate: null
  success_reward_threshold: 1.0
  use_successful_as_teacher: true
  include_environment_feedback: false
  feedback_column: null
  beta: 0.0
  epsilon: 0.2
  scale_rewards: group
  temperature: 1.3
  dump_rollouts: true
optimization:
  num_train_epochs: 1
  per_device_batch_size: 1
  gradient_accumulation_steps: 2
  learning_rate: 5.0e-05
  warmup_ratio: 0.0
  weight_decay: 0.0
  lr_scheduler_type: cosine
  max_grad_norm: 1.0
sequence:
  max_length: 2048
  packing: true
checkpointing:
  num_checkpoints: 8
  save_total_limit: 64
  schedule: log
  save_steps: null
runtime:
  logging_steps: 20
  bf16: true
  gradient_checkpointing: true
  wandb: true
  wandb_project: amr-fma-train
  hf_push: true
  hf_org: tkwiecinski
  hf_visibility: public
  force_restart: false
evaluation:
  enabled: false
  eval_steps: null
  strategy: null
phase:
  name: P1
run:
  method: lora_sdpo
  exp_name: p1_sdpo_multimodel_trial
  seed: 42
  tags:
    phase: ${phase.name}
    domain: ${dataset.domain}
paths:
  base: ${oc.env:AMR_FMA_BASE,/capstor/scratch/cscs/${oc.env:USER}/amr-fma}
  model_short: ${hf_last:${model.base_model_id}}
  run_slug: ${run.exp_name}__s${run.seed}