tkwiecinski commited on
Commit
1c88cf2
·
verified ·
1 Parent(s): c7f293c

Finalize run summary on main

Browse files
Files changed (3) hide show
  1. README.md +41 -0
  2. manifest.yaml +214 -0
  3. resolved_config.yaml +97 -0
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: allenai/OLMo-2-1124-7B-Instruct
4
+ tags:
5
+ - amr-fma
6
+ - lora_sdpo
7
+ - domain:science
8
+ - phase:P1
9
+ ---
10
+
11
+ # tkwiecinski/amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42
12
+
13
+ amr-fma training run.
14
+
15
+ - **Method**: `lora_sdpo`
16
+ - **Base model**: `allenai/OLMo-2-1124-7B-Instruct`
17
+ - **Dataset**: `allenai/ai2_arc` (slug: `arc_challenge`)
18
+ - **Seed**: `42`
19
+ - **Git commit**: `0a703a3b9fa4a2fe6be6ab5621e40883fd67118c`
20
+ - **Exp name**: `p1_sdpo_multimodel_trial`
21
+ - **WandB run**: `5m88ybj7`
22
+
23
+ ## Tags
24
+ - phase:P1
25
+ - domain:science
26
+
27
+ ## Checkpoints (branches)
28
+ - step 1 → revision `step-00001`
29
+ - step 3 → revision `step-00003`
30
+ - step 5 → revision `step-00005`
31
+ - step 10 → revision `step-00010`
32
+ - step 19 → revision `step-00019`
33
+ - step 35 → revision `step-00035`
34
+ - step 63 → revision `step-00063`
35
+ - step 64 → revision `step-00064`
36
+
37
+ Pin a specific checkpoint with `revision=...` in
38
+ `AutoModelForCausalLM.from_pretrained` / `PeftModel.from_pretrained`.
39
+
40
+ ## Hyperparameter sections
41
+ `checkpointing`, `dataset`, `evaluation`, `final_adapter_path`, `lora`, `model`, `optimization`, `prompt_style`, `runtime`, `sdpo`, `sequence`, `total_steps`
manifest.yaml ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ method: lora_sdpo
2
+ base_model_id: allenai/OLMo-2-1124-7B-Instruct
3
+ seed: 42
4
+ exp_name: p1_sdpo_multimodel_trial
5
+ git_commit: 0a703a3b9fa4a2fe6be6ab5621e40883fd67118c
6
+ dataset: allenai/ai2_arc
7
+ dataset_slug: arc_challenge
8
+ manifest_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/manifest.yaml
9
+ tags:
10
+ phase: P1
11
+ domain: science
12
+ hyperparams:
13
+ model:
14
+ base_model_id: allenai/OLMo-2-1124-7B-Instruct
15
+ model_family: olmo
16
+ target_modules:
17
+ - q_proj
18
+ - k_proj
19
+ - v_proj
20
+ - o_proj
21
+ - gate_proj
22
+ - up_proj
23
+ - down_proj
24
+ dataset:
25
+ name: allenai/ai2_arc
26
+ split: train
27
+ text_field: question
28
+ max_samples: 50
29
+ eval_samples: 256
30
+ config: ARC-Challenge
31
+ domain: science
32
+ slug: arc_challenge
33
+ format: arc
34
+ sequence:
35
+ max_length: 2048
36
+ packing: true
37
+ lora:
38
+ r: 16
39
+ alpha: 32
40
+ dropout: 0.05
41
+ target_modules:
42
+ - q_proj
43
+ - k_proj
44
+ - v_proj
45
+ - o_proj
46
+ - gate_proj
47
+ - up_proj
48
+ - down_proj
49
+ optimization:
50
+ num_train_epochs: 1
51
+ per_device_batch_size: 1
52
+ gradient_accumulation_steps: 2
53
+ learning_rate: 5.0e-05
54
+ warmup_ratio: 0.0
55
+ weight_decay: 0.0
56
+ lr_scheduler_type: cosine
57
+ max_grad_norm: 1.0
58
+ checkpointing:
59
+ num_checkpoints: 8
60
+ save_total_limit: 64
61
+ schedule: log
62
+ save_steps: null
63
+ runtime:
64
+ logging_steps: 20
65
+ bf16: true
66
+ gradient_checkpointing: true
67
+ wandb: true
68
+ wandb_project: amr-fma-train
69
+ hf_push: true
70
+ hf_org: tkwiecinski
71
+ hf_visibility: public
72
+ force_restart: false
73
+ sdpo:
74
+ reward: gsm8k_match
75
+ num_generations: 4
76
+ generation_batch_size: 64
77
+ steps_per_generation: null
78
+ max_prompt_length: 512
79
+ max_completion_length: 1024
80
+ distillation_alpha: 1.0
81
+ distillation_topk: null
82
+ distillation_weight: 1.0
83
+ distillation_is_clip: 2.0
84
+ full_logit_distillation: false
85
+ policy_loss_mode: distillation_only
86
+ teacher_regularization: ema
87
+ teacher_update_rate: null
88
+ success_reward_threshold: 1.0
89
+ use_successful_as_teacher: true
90
+ include_environment_feedback: false
91
+ feedback_column: null
92
+ beta: 0.0
93
+ epsilon: 0.2
94
+ scale_rewards: group
95
+ mask_truncated_completions: true
96
+ dump_rollouts: true
97
+ temperature: 1.3
98
+ evaluation:
99
+ enabled: false
100
+ eval_steps: null
101
+ strategy: null
102
+ prompt_style:
103
+ system_prompt: Please reason step by step, and put your final answer within \boxed{}.
104
+ style: boxed
105
+ final_adapter_path: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/adapter_final
106
+ total_steps: 64
107
+ checkpoints:
108
+ - step: 1
109
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1
110
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-1
111
+ metadata:
112
+ source: trainer_on_save
113
+ metrics:
114
+ eval_loss: 0.0
115
+ eval_runtime: 1737.7203
116
+ eval_samples_per_second: 0.023
117
+ eval_steps_per_second: 0.006
118
+ eval_perplexity: 1.0
119
+ hf_revision: step-00001
120
+ hf_commit: a65627ced5a738a6fbbf63f1239fcff362cc2463
121
+ - step: 3
122
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3
123
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-3
124
+ metadata:
125
+ source: trainer_on_save
126
+ metrics:
127
+ eval_loss: 0.0
128
+ eval_runtime: 976.0781
129
+ eval_samples_per_second: 0.01
130
+ eval_steps_per_second: 0.003
131
+ eval_perplexity: 1.0
132
+ hf_revision: step-00003
133
+ hf_commit: 8289289634e4cd8ecb860c0149be6605d99403a7
134
+ - step: 5
135
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5
136
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-5
137
+ metadata:
138
+ source: trainer_on_save
139
+ metrics:
140
+ eval_loss: 0.0
141
+ eval_runtime: 689.7017
142
+ eval_samples_per_second: 0.014
143
+ eval_steps_per_second: 0.004
144
+ eval_perplexity: 1.0
145
+ hf_revision: step-00005
146
+ hf_commit: 5e45883a335d04a20e32e9a1869e09223768b7e4
147
+ - step: 10
148
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10
149
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-10
150
+ metadata:
151
+ source: trainer_on_save
152
+ metrics:
153
+ eval_loss: 0.0
154
+ eval_runtime: 687.1543
155
+ eval_samples_per_second: 0.015
156
+ eval_steps_per_second: 0.004
157
+ eval_perplexity: 1.0
158
+ hf_revision: step-00010
159
+ hf_commit: ab6969ff56c49251b6d78b50e0933dfe7b125281
160
+ - step: 19
161
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19
162
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-19
163
+ metadata:
164
+ source: trainer_on_save
165
+ metrics:
166
+ eval_loss: 0.0
167
+ eval_runtime: 639.9336
168
+ eval_samples_per_second: 0.016
169
+ eval_steps_per_second: 0.005
170
+ eval_perplexity: 1.0
171
+ hf_revision: step-00019
172
+ hf_commit: b124ebbca6610e5f52404bf0293564970e095669
173
+ - step: 35
174
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35
175
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-35
176
+ metadata:
177
+ source: trainer_on_save
178
+ metrics:
179
+ eval_loss: 0.0
180
+ eval_runtime: 727.0752
181
+ eval_samples_per_second: 0.014
182
+ eval_steps_per_second: 0.004
183
+ eval_perplexity: 1.0
184
+ hf_revision: step-00035
185
+ hf_commit: fea3c0826d56d2acaa31c37d9043284fc3498a9b
186
+ - step: 63
187
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63
188
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-63
189
+ metadata:
190
+ source: trainer_on_save
191
+ metrics:
192
+ eval_loss: 0.0
193
+ eval_runtime: 741.7922
194
+ eval_samples_per_second: 0.013
195
+ eval_steps_per_second: 0.004
196
+ eval_perplexity: 1.0
197
+ hf_revision: step-00063
198
+ hf_commit: deca286dbe3d7475e736048548229a45cfe17105
199
+ - step: 64
200
+ dir: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64
201
+ artifact: /capstor/scratch/cscs/tkwiecinski/amr-fma/train/OLMo-2-1124-7B-Instruct/lora_sdpo/arc_challenge/p1_sdpo_multimodel_trial__s42/checkpoint-64
202
+ metadata:
203
+ source: trainer_on_save
204
+ metrics:
205
+ eval_loss: 0.0
206
+ eval_runtime: 771.2344
207
+ eval_samples_per_second: 0.013
208
+ eval_steps_per_second: 0.004
209
+ eval_perplexity: 1.0
210
+ hf_revision: step-00064
211
+ hf_commit: 9968e416d14e56c2f4ec3a85d9a56158842e6dee
212
+ wandb_run_id: 5m88ybj7
213
+ wandb_eval_run_ids: {}
214
+ hf_repo_id: tkwiecinski/amr-fma-OLMo-2-1124-7B-Instruct-lora_sdpo-arc_challenge-p1_sdpo_multimodel_trial-s42
resolved_config.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_model_id: allenai/OLMo-2-1124-7B-Instruct
3
+ model_family: olmo
4
+ target_modules:
5
+ - q_proj
6
+ - k_proj
7
+ - v_proj
8
+ - o_proj
9
+ - gate_proj
10
+ - up_proj
11
+ - down_proj
12
+ lora:
13
+ r: 16
14
+ alpha: 32
15
+ dropout: 0.05
16
+ dataset:
17
+ name: allenai/ai2_arc
18
+ config: ARC-Challenge
19
+ slug: arc_challenge
20
+ split: train
21
+ text_field: question
22
+ max_samples: 50
23
+ eval_samples: 256
24
+ domain: science
25
+ format: arc
26
+ prompt_style:
27
+ style: boxed
28
+ system_prompt: Please reason step by step, and put your final answer within \boxed{}.
29
+ sdpo:
30
+ reward: gsm8k_match
31
+ num_generations: 4
32
+ generation_batch_size: 64
33
+ steps_per_generation: null
34
+ max_prompt_length: 512
35
+ max_completion_length: 1024
36
+ mask_truncated_completions: true
37
+ distillation_alpha: 1.0
38
+ distillation_topk: null
39
+ distillation_weight: 1.0
40
+ distillation_is_clip: 2.0
41
+ full_logit_distillation: false
42
+ policy_loss_mode: distillation_only
43
+ teacher_regularization: ema
44
+ teacher_update_rate: null
45
+ success_reward_threshold: 1.0
46
+ use_successful_as_teacher: true
47
+ include_environment_feedback: false
48
+ feedback_column: null
49
+ beta: 0.0
50
+ epsilon: 0.2
51
+ scale_rewards: group
52
+ temperature: 1.3
53
+ dump_rollouts: true
54
+ optimization:
55
+ num_train_epochs: 1
56
+ per_device_batch_size: 1
57
+ gradient_accumulation_steps: 2
58
+ learning_rate: 5.0e-05
59
+ warmup_ratio: 0.0
60
+ weight_decay: 0.0
61
+ lr_scheduler_type: cosine
62
+ max_grad_norm: 1.0
63
+ sequence:
64
+ max_length: 2048
65
+ packing: true
66
+ checkpointing:
67
+ num_checkpoints: 8
68
+ save_total_limit: 64
69
+ schedule: log
70
+ save_steps: null
71
+ runtime:
72
+ logging_steps: 20
73
+ bf16: true
74
+ gradient_checkpointing: true
75
+ wandb: true
76
+ wandb_project: amr-fma-train
77
+ hf_push: true
78
+ hf_org: tkwiecinski
79
+ hf_visibility: public
80
+ force_restart: false
81
+ evaluation:
82
+ enabled: false
83
+ eval_steps: null
84
+ strategy: null
85
+ phase:
86
+ name: P1
87
+ run:
88
+ method: lora_sdpo
89
+ exp_name: p1_sdpo_multimodel_trial
90
+ seed: 42
91
+ tags:
92
+ phase: ${phase.name}
93
+ domain: ${dataset.domain}
94
+ paths:
95
+ base: ${oc.env:AMR_FMA_BASE,/capstor/scratch/cscs/${oc.env:USER}/amr-fma}
96
+ model_short: ${hf_last:${model.base_model_id}}
97
+ run_slug: ${run.exp_name}__s${run.seed}