tkwiecinski
/

amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42

Model card Files Files and versions

amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42 / resolved_config.yaml

tkwiecinski's picture

Finalize run summary on main

1c88cf2 verified 27 days ago

2.12 kB

	model:
	base_model_id: allenai/OLMo-2-1124-7B-Instruct
	model_family: olmo
	target_modules:
	- q_proj
	- k_proj
	- v_proj
	- o_proj
	- gate_proj
	- up_proj
	- down_proj
	lora:
	r: 16
	alpha: 32
	dropout: 0.05
	dataset:
	name: allenai/ai2_arc
	config: ARC-Challenge
	slug: arc_challenge
	split: train
	text_field: question
	max_samples: 50
	eval_samples: 256
	domain: science
	format: arc
	prompt_style:
	style: boxed
	system_prompt: Please reason step by step, and put your final answer within \boxed{}.
	sdpo:
	reward: gsm8k_match
	num_generations: 4
	generation_batch_size: 64
	steps_per_generation: null
	max_prompt_length: 512
	max_completion_length: 1024
	mask_truncated_completions: true
	distillation_alpha: 1.0
	distillation_topk: null
	distillation_weight: 1.0
	distillation_is_clip: 2.0
	full_logit_distillation: false
	policy_loss_mode: distillation_only
	teacher_regularization: ema
	teacher_update_rate: null
	success_reward_threshold: 1.0
	use_successful_as_teacher: true
	include_environment_feedback: false
	feedback_column: null
	beta: 0.0
	epsilon: 0.2
	scale_rewards: group
	temperature: 1.3
	dump_rollouts: true
	optimization:
	num_train_epochs: 1
	per_device_batch_size: 1
	gradient_accumulation_steps: 2
	learning_rate: 5.0e-05
	warmup_ratio: 0.0
	weight_decay: 0.0
	lr_scheduler_type: cosine
	max_grad_norm: 1.0
	sequence:
	max_length: 2048
	packing: true
	checkpointing:
	num_checkpoints: 8
	save_total_limit: 64
	schedule: log
	save_steps: null
	runtime:
	logging_steps: 20
	bf16: true
	gradient_checkpointing: true
	wandb: true
	wandb_project: amr-fma-train
	hf_push: true
	hf_org: tkwiecinski
	hf_visibility: public
	force_restart: false
	evaluation:
	enabled: false
	eval_steps: null
	strategy: null
	phase:
	name: P1
	run:
	method: lora_sdpo
	exp_name: p1_sdpo_multimodel_trial
	seed: 42
	tags:
	phase: ${phase.name}
	domain: ${dataset.domain}
	paths:
	base: ${oc.env:AMR_FMA_BASE,/capstor/scratch/cscs/${oc.env:USER}/amr-fma}
	model_short: ${hf_last:${model.base_model_id}}
	run_slug: ${run.exp_name}__s${run.seed}