tkwiecinski's picture
Finalize run summary on main
1c88cf2 verified
Raw
History Blame
2.12 kB
model:
base_model_id: allenai/OLMo-2-1124-7B-Instruct
model_family: olmo
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
lora:
r: 16
alpha: 32
dropout: 0.05
dataset:
name: allenai/ai2_arc
config: ARC-Challenge
slug: arc_challenge
split: train
text_field: question
max_samples: 50
eval_samples: 256
domain: science
format: arc
prompt_style:
style: boxed
system_prompt: Please reason step by step, and put your final answer within \boxed{}.
sdpo:
reward: gsm8k_match
num_generations: 4
generation_batch_size: 64
steps_per_generation: null
max_prompt_length: 512
max_completion_length: 1024
mask_truncated_completions: true
distillation_alpha: 1.0
distillation_topk: null
distillation_weight: 1.0
distillation_is_clip: 2.0
full_logit_distillation: false
policy_loss_mode: distillation_only
teacher_regularization: ema
teacher_update_rate: null
success_reward_threshold: 1.0
use_successful_as_teacher: true
include_environment_feedback: false
feedback_column: null
beta: 0.0
epsilon: 0.2
scale_rewards: group
temperature: 1.3
dump_rollouts: true
optimization:
num_train_epochs: 1
per_device_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 5.0e-05
warmup_ratio: 0.0
weight_decay: 0.0
lr_scheduler_type: cosine
max_grad_norm: 1.0
sequence:
max_length: 2048
packing: true
checkpointing:
num_checkpoints: 8
save_total_limit: 64
schedule: log
save_steps: null
runtime:
logging_steps: 20
bf16: true
gradient_checkpointing: true
wandb: true
wandb_project: amr-fma-train
hf_push: true
hf_org: tkwiecinski
hf_visibility: public
force_restart: false
evaluation:
enabled: false
eval_steps: null
strategy: null
phase:
name: P1
run:
method: lora_sdpo
exp_name: p1_sdpo_multimodel_trial
seed: 42
tags:
phase: ${phase.name}
domain: ${dataset.domain}
paths:
base: ${oc.env:AMR_FMA_BASE,/capstor/scratch/cscs/${oc.env:USER}/amr-fma}
model_short: ${hf_last:${model.base_model_id}}
run_slug: ${run.exp_name}__s${run.seed}