rslxcvg commited on
Commit
bb07d02
·
verified ·
1 Parent(s): 9024932

Upload diagnostic checkpoint 010000

Browse files
config.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "molmoact2",
3
+ "n_obs_steps": 1,
4
+ "input_features": {
5
+ "observation.state": {
6
+ "type": "STATE",
7
+ "shape": [
8
+ 6
9
+ ]
10
+ },
11
+ "observation.images.front": {
12
+ "type": "VISUAL",
13
+ "shape": [
14
+ 3,
15
+ 480,
16
+ 640
17
+ ]
18
+ }
19
+ },
20
+ "output_features": {
21
+ "action": {
22
+ "type": "ACTION",
23
+ "shape": [
24
+ 6
25
+ ]
26
+ }
27
+ },
28
+ "device": "cuda",
29
+ "use_amp": false,
30
+ "use_peft": false,
31
+ "push_to_hub": false,
32
+ "repo_id": null,
33
+ "private": null,
34
+ "tags": null,
35
+ "license": null,
36
+ "pretrained_path": null,
37
+ "checkpoint_path": "allenai/MolmoAct2-SO100_101",
38
+ "checkpoint_revision": null,
39
+ "checkpoint_force_download": false,
40
+ "trust_remote_code": true,
41
+ "chunk_size": 30,
42
+ "n_action_steps": 30,
43
+ "action_mode": "continuous",
44
+ "inference_action_mode": null,
45
+ "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer",
46
+ "discrete_generation_max_steps": null,
47
+ "norm_tag": "so100_so101_molmoact2",
48
+ "setup_type": "single so100/so101 robotic arm in molmoact2",
49
+ "control_mode": "absolute joint pose",
50
+ "image_keys": [
51
+ "observation.images.front"
52
+ ],
53
+ "normalize_language": true,
54
+ "add_setup_tokens": true,
55
+ "add_control_tokens": true,
56
+ "normalize_gripper": true,
57
+ "num_state_tokens": 256,
58
+ "max_sequence_length": null,
59
+ "expected_max_action_dim": 32,
60
+ "num_flow_timesteps": 8,
61
+ "flow_matching_cutoff": 1.0,
62
+ "flow_matching_time_offset": 0.001,
63
+ "flow_matching_time_scale": 0.999,
64
+ "flow_matching_beta_alpha": 1.0,
65
+ "flow_matching_beta_beta": 1.5,
66
+ "num_inference_steps": null,
67
+ "mask_action_dim_padding": true,
68
+ "flow_loss_action_dim_weights": [
69
+ 8.0,
70
+ 3.0,
71
+ 3.0,
72
+ 1.0,
73
+ 1.0,
74
+ 1.0
75
+ ],
76
+ "enable_inference_cuda_graph": true,
77
+ "per_episode_seed": false,
78
+ "eval_seed": null,
79
+ "rtc_config": null,
80
+ "enable_lora_vlm": true,
81
+ "lora_rank": 128,
82
+ "lora_alpha": 16,
83
+ "lora_dropout": 0.05,
84
+ "lora_bias": "none",
85
+ "enable_lora_action_expert": true,
86
+ "enable_knowledge_insulation": false,
87
+ "freeze_embedding": true,
88
+ "train_action_expert_only": false,
89
+ "gradient_checkpointing": true,
90
+ "model_dtype": "bfloat16",
91
+ "softmax_auxiliary_loss": true,
92
+ "softmax_auxiliary_loss_scale": 0.0001,
93
+ "discrete_loss_token_weighting": "root_subsegments_root_tokens",
94
+ "optimizer_lr": 1e-05,
95
+ "optimizer_vit_lr": 5e-06,
96
+ "optimizer_connector_lr": 5e-06,
97
+ "optimizer_action_expert_lr": 5e-05,
98
+ "optimizer_betas": [
99
+ 0.9,
100
+ 0.95
101
+ ],
102
+ "optimizer_eps": 1e-06,
103
+ "optimizer_weight_decay": 0.0,
104
+ "optimizer_grad_clip_norm": 1.0,
105
+ "scheduler_warmup_steps": 200,
106
+ "scheduler_decay_steps": null,
107
+ "scheduler_decay_lr": 1e-06,
108
+ "normalization_mapping": {
109
+ "VISUAL": "IDENTITY",
110
+ "STATE": "QUANTILES",
111
+ "ACTION": "QUANTILES"
112
+ },
113
+ "dataset_feature_names": {
114
+ "action": [
115
+ "shoulder_pan.pos",
116
+ "shoulder_lift.pos",
117
+ "elbow_flex.pos",
118
+ "wrist_flex.pos",
119
+ "wrist_roll.pos",
120
+ "gripper.pos"
121
+ ],
122
+ "observation.state": [
123
+ "shoulder_pan.pos",
124
+ "shoulder_lift.pos",
125
+ "elbow_flex.pos",
126
+ "wrist_flex.pos",
127
+ "wrist_roll.pos",
128
+ "gripper.pos"
129
+ ]
130
+ }
131
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfd9e9e8e515caa1eb1f020322477a2ffc4675338f5f073ad0ce596102f052b7
3
+ size 12583264536
policy_postprocessor.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "policy_postprocessor",
3
+ "steps": [
4
+ {
5
+ "registry_name": "molmoact2_clamp_action",
6
+ "config": {}
7
+ },
8
+ {
9
+ "registry_name": "molmoact2_masked_unnormalizer",
10
+ "config": {
11
+ "eps": 1e-08,
12
+ "features": {
13
+ "action": {
14
+ "type": "ACTION",
15
+ "shape": [
16
+ 6
17
+ ]
18
+ }
19
+ },
20
+ "norm_map": {
21
+ "VISUAL": "IDENTITY",
22
+ "STATE": "QUANTILES",
23
+ "ACTION": "QUANTILES"
24
+ }
25
+ },
26
+ "state_file": "policy_postprocessor_step_1_molmoact2_masked_unnormalizer.safetensors"
27
+ },
28
+ {
29
+ "registry_name": "device_processor",
30
+ "config": {
31
+ "device": "cpu",
32
+ "float_dtype": null
33
+ }
34
+ }
35
+ ]
36
+ }
policy_postprocessor_step_1_molmoact2_masked_unnormalizer.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d01a380ee3644aa2b3b38d7162cea8caa9f75f1cb8aeac74ea3a91c9fe37a1
3
+ size 6752
policy_preprocessor.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "policy_preprocessor",
3
+ "steps": [
4
+ {
5
+ "registry_name": "rename_observations_processor",
6
+ "config": {
7
+ "rename_map": {}
8
+ }
9
+ },
10
+ {
11
+ "registry_name": "to_batch_processor",
12
+ "config": {}
13
+ },
14
+ {
15
+ "registry_name": "molmoact2_masked_normalizer",
16
+ "config": {
17
+ "eps": 1e-08,
18
+ "features": {
19
+ "observation.state": {
20
+ "type": "STATE",
21
+ "shape": [
22
+ 6
23
+ ]
24
+ },
25
+ "observation.images.front": {
26
+ "type": "VISUAL",
27
+ "shape": [
28
+ 3,
29
+ 480,
30
+ 640
31
+ ]
32
+ },
33
+ "action": {
34
+ "type": "ACTION",
35
+ "shape": [
36
+ 6
37
+ ]
38
+ }
39
+ },
40
+ "norm_map": {
41
+ "VISUAL": "IDENTITY",
42
+ "STATE": "QUANTILES",
43
+ "ACTION": "QUANTILES"
44
+ }
45
+ },
46
+ "state_file": "policy_preprocessor_step_2_molmoact2_masked_normalizer.safetensors"
47
+ },
48
+ {
49
+ "registry_name": "molmoact2_clamp_normalized",
50
+ "config": {}
51
+ },
52
+ {
53
+ "registry_name": "molmoact2_pack_inputs",
54
+ "config": {
55
+ "checkpoint_path": "allenai/MolmoAct2-SO100_101",
56
+ "checkpoint_revision": null,
57
+ "checkpoint_force_download": false,
58
+ "trust_remote_code": true,
59
+ "action_mode": "continuous",
60
+ "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer",
61
+ "image_keys": [
62
+ "observation.images.front"
63
+ ],
64
+ "setup_type": "single so100/so101 robotic arm in molmoact2",
65
+ "control_mode": "absolute joint pose",
66
+ "normalize_language": true,
67
+ "add_setup_tokens": true,
68
+ "add_control_tokens": true,
69
+ "num_state_tokens": 256,
70
+ "max_sequence_length": null,
71
+ "chunk_size": 30,
72
+ "max_action_dim": 32,
73
+ "env_action_dim": 6
74
+ }
75
+ },
76
+ {
77
+ "registry_name": "device_processor",
78
+ "config": {
79
+ "device": "cuda",
80
+ "float_dtype": null
81
+ }
82
+ }
83
+ ]
84
+ }
policy_preprocessor_step_2_molmoact2_masked_normalizer.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d01a380ee3644aa2b3b38d7162cea8caa9f75f1cb8aeac74ea3a91c9fe37a1
3
+ size 6752
train_config.json ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "repo_id": "rslxcvg/banana_act_direct_color_simple_v1_molmo_compat",
4
+ "root": "/mnt/vla_picknplace/outputs/lerobot/banana_act_direct_color_simple_v1_molmo_compat",
5
+ "episodes": null,
6
+ "image_transforms": {
7
+ "enable": true,
8
+ "max_num_transforms": 3,
9
+ "random_order": false,
10
+ "tfs": {
11
+ "brightness": {
12
+ "weight": 1.0,
13
+ "type": "ColorJitter",
14
+ "kwargs": {
15
+ "brightness": [
16
+ 0.8,
17
+ 1.2
18
+ ]
19
+ }
20
+ },
21
+ "contrast": {
22
+ "weight": 1.0,
23
+ "type": "ColorJitter",
24
+ "kwargs": {
25
+ "contrast": [
26
+ 0.8,
27
+ 1.2
28
+ ]
29
+ }
30
+ },
31
+ "saturation": {
32
+ "weight": 1.0,
33
+ "type": "ColorJitter",
34
+ "kwargs": {
35
+ "saturation": [
36
+ 0.5,
37
+ 1.5
38
+ ]
39
+ }
40
+ },
41
+ "hue": {
42
+ "weight": 1.0,
43
+ "type": "ColorJitter",
44
+ "kwargs": {
45
+ "hue": [
46
+ -0.05,
47
+ 0.05
48
+ ]
49
+ }
50
+ },
51
+ "sharpness": {
52
+ "weight": 1.0,
53
+ "type": "SharpnessJitter",
54
+ "kwargs": {
55
+ "sharpness": [
56
+ 0.5,
57
+ 1.5
58
+ ]
59
+ }
60
+ },
61
+ "affine": {
62
+ "weight": 1.0,
63
+ "type": "RandomAffine",
64
+ "kwargs": {
65
+ "degrees": [
66
+ -5.0,
67
+ 5.0
68
+ ],
69
+ "translate": [
70
+ 0.05,
71
+ 0.05
72
+ ]
73
+ }
74
+ }
75
+ }
76
+ },
77
+ "revision": null,
78
+ "use_imagenet_stats": true,
79
+ "video_backend": "pyav",
80
+ "return_uint8": false,
81
+ "streaming": false
82
+ },
83
+ "env": null,
84
+ "policy": {
85
+ "type": "molmoact2",
86
+ "n_obs_steps": 1,
87
+ "input_features": {
88
+ "observation.state": {
89
+ "type": "STATE",
90
+ "shape": [
91
+ 6
92
+ ]
93
+ },
94
+ "observation.images.front": {
95
+ "type": "VISUAL",
96
+ "shape": [
97
+ 3,
98
+ 480,
99
+ 640
100
+ ]
101
+ }
102
+ },
103
+ "output_features": {
104
+ "action": {
105
+ "type": "ACTION",
106
+ "shape": [
107
+ 6
108
+ ]
109
+ }
110
+ },
111
+ "device": "cuda",
112
+ "use_amp": false,
113
+ "use_peft": false,
114
+ "push_to_hub": false,
115
+ "repo_id": null,
116
+ "private": null,
117
+ "tags": null,
118
+ "license": null,
119
+ "pretrained_path": null,
120
+ "checkpoint_path": "allenai/MolmoAct2-SO100_101",
121
+ "checkpoint_revision": null,
122
+ "checkpoint_force_download": false,
123
+ "trust_remote_code": true,
124
+ "chunk_size": 30,
125
+ "n_action_steps": 30,
126
+ "action_mode": "continuous",
127
+ "inference_action_mode": null,
128
+ "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer",
129
+ "discrete_generation_max_steps": null,
130
+ "norm_tag": "so100_so101_molmoact2",
131
+ "setup_type": "single so100/so101 robotic arm in molmoact2",
132
+ "control_mode": "absolute joint pose",
133
+ "image_keys": [
134
+ "observation.images.front"
135
+ ],
136
+ "normalize_language": true,
137
+ "add_setup_tokens": true,
138
+ "add_control_tokens": true,
139
+ "normalize_gripper": true,
140
+ "num_state_tokens": 256,
141
+ "max_sequence_length": null,
142
+ "expected_max_action_dim": 32,
143
+ "num_flow_timesteps": 8,
144
+ "flow_matching_cutoff": 1.0,
145
+ "flow_matching_time_offset": 0.001,
146
+ "flow_matching_time_scale": 0.999,
147
+ "flow_matching_beta_alpha": 1.0,
148
+ "flow_matching_beta_beta": 1.5,
149
+ "num_inference_steps": null,
150
+ "mask_action_dim_padding": true,
151
+ "flow_loss_action_dim_weights": [
152
+ 8.0,
153
+ 3.0,
154
+ 3.0,
155
+ 1.0,
156
+ 1.0,
157
+ 1.0
158
+ ],
159
+ "enable_inference_cuda_graph": true,
160
+ "per_episode_seed": false,
161
+ "eval_seed": null,
162
+ "rtc_config": null,
163
+ "enable_lora_vlm": true,
164
+ "lora_rank": 128,
165
+ "lora_alpha": 16,
166
+ "lora_dropout": 0.05,
167
+ "lora_bias": "none",
168
+ "enable_lora_action_expert": true,
169
+ "enable_knowledge_insulation": false,
170
+ "freeze_embedding": true,
171
+ "train_action_expert_only": false,
172
+ "gradient_checkpointing": true,
173
+ "model_dtype": "bfloat16",
174
+ "softmax_auxiliary_loss": true,
175
+ "softmax_auxiliary_loss_scale": 0.0001,
176
+ "discrete_loss_token_weighting": "root_subsegments_root_tokens",
177
+ "optimizer_lr": 1e-05,
178
+ "optimizer_vit_lr": 5e-06,
179
+ "optimizer_connector_lr": 5e-06,
180
+ "optimizer_action_expert_lr": 5e-05,
181
+ "optimizer_betas": [
182
+ 0.9,
183
+ 0.95
184
+ ],
185
+ "optimizer_eps": 1e-06,
186
+ "optimizer_weight_decay": 0.0,
187
+ "optimizer_grad_clip_norm": 1.0,
188
+ "scheduler_warmup_steps": 200,
189
+ "scheduler_decay_steps": null,
190
+ "scheduler_decay_lr": 1e-06,
191
+ "normalization_mapping": {
192
+ "VISUAL": "IDENTITY",
193
+ "STATE": "QUANTILES",
194
+ "ACTION": "QUANTILES"
195
+ },
196
+ "dataset_feature_names": {
197
+ "action": [
198
+ "shoulder_pan.pos",
199
+ "shoulder_lift.pos",
200
+ "elbow_flex.pos",
201
+ "wrist_flex.pos",
202
+ "wrist_roll.pos",
203
+ "gripper.pos"
204
+ ],
205
+ "observation.state": [
206
+ "shoulder_pan.pos",
207
+ "shoulder_lift.pos",
208
+ "elbow_flex.pos",
209
+ "wrist_flex.pos",
210
+ "wrist_roll.pos",
211
+ "gripper.pos"
212
+ ]
213
+ }
214
+ },
215
+ "reward_model": null,
216
+ "output_dir": "/mnt/vla_picknplace/outputs/molmoact2/molmoact2_overnight_frombase_prod_r128_c010_rw3_w8_gpu5_20260519_fullcoverage_v1",
217
+ "job_name": "molmoact2_overnight_frombase_prod_r128_c010_rw3_w8_gpu5_20260519_fullcoverage_v1",
218
+ "resume": false,
219
+ "seed": 1000,
220
+ "cudnn_deterministic": false,
221
+ "num_workers": 4,
222
+ "batch_size": 16,
223
+ "prefetch_factor": 4,
224
+ "persistent_workers": true,
225
+ "steps": 10000,
226
+ "eval_freq": -1,
227
+ "log_freq": 20,
228
+ "tolerance_s": 0.0001,
229
+ "save_checkpoint": true,
230
+ "save_freq": 1000,
231
+ "use_policy_training_preset": true,
232
+ "optimizer": {
233
+ "type": "adamw",
234
+ "lr": 1e-05,
235
+ "weight_decay": 0.0,
236
+ "grad_clip_norm": 1.0,
237
+ "betas": [
238
+ 0.9,
239
+ 0.95
240
+ ],
241
+ "eps": 1e-06
242
+ },
243
+ "scheduler": {
244
+ "type": "molmoact2_cosine_decay_with_warmup",
245
+ "num_warmup_steps": 200,
246
+ "num_decay_steps": null,
247
+ "peak_lr": 1e-05,
248
+ "decay_lr": 1e-06
249
+ },
250
+ "eval": {
251
+ "n_episodes": 50,
252
+ "batch_size": 50,
253
+ "use_async_envs": true
254
+ },
255
+ "wandb": {
256
+ "enable": true,
257
+ "disable_artifact": true,
258
+ "project": "vla_picknplace",
259
+ "entity": null,
260
+ "notes": null,
261
+ "run_id": "we76vk6a",
262
+ "mode": "online",
263
+ "add_tags": true
264
+ },
265
+ "peft": null,
266
+ "sample_weighting": {
267
+ "type": "manifest",
268
+ "progress_path": null,
269
+ "weights_path": "/mnt/vla_picknplace/outputs/molmoact2/decision_manifests/banana_act_direct_color_simple_v1_molmo_compat_manifest.parquet",
270
+ "weight_column": "loss_weight_normalized",
271
+ "index_column": "index",
272
+ "normalize_batch_mean": false,
273
+ "clip_min": 0.25,
274
+ "clip_max": 4.0,
275
+ "head_mode": "sparse",
276
+ "kappa": 0.01,
277
+ "epsilon": 1e-06,
278
+ "extra_params": {}
279
+ },
280
+ "frame_sampling": {
281
+ "type": "manifest",
282
+ "weights_path": "/mnt/vla_picknplace/outputs/molmoact2/decision_manifests/banana_act_direct_color_simple_v1_molmo_compat_manifest.parquet",
283
+ "weight_column": "sample_weight",
284
+ "index_column": "index",
285
+ "replacement": false,
286
+ "num_samples": null
287
+ },
288
+ "prompt_contrast": {
289
+ "type": "same_layout_reference",
290
+ "manifest_path": "/mnt/vla_picknplace/outputs/molmoact2/decision_manifests/banana_act_direct_color_simple_v1_molmo_compat_manifest.parquet",
291
+ "weight": 0.1,
292
+ "max_anchors_per_batch": 1,
293
+ "min_valid_horizon": 30,
294
+ "phase_min": 0.55,
295
+ "phase_max": 0.72,
296
+ "max_reference_phase_delta": 0.03,
297
+ "max_reference_state_l2": null,
298
+ "max_reference_joint_abs": [
299
+ 40.0,
300
+ 25.0,
301
+ 25.0,
302
+ 25.0,
303
+ 30.0,
304
+ 30.0
305
+ ],
306
+ "require_complete_color_set": true,
307
+ "colors": [
308
+ "red",
309
+ "green",
310
+ "blue"
311
+ ],
312
+ "same_noise_across_colors": true,
313
+ "low_t_min": 0.001,
314
+ "low_t_max": 0.1,
315
+ "rank_loss_weight": 3.0,
316
+ "rank_action_index": 5,
317
+ "rank_action_dim": 0,
318
+ "rank_margin": 0.03,
319
+ "rank_temperature": 0.05,
320
+ "rank_min_target_delta": 0.0001,
321
+ "seed": 1000
322
+ },
323
+ "rename_map": {},
324
+ "checkpoint_path": null
325
+ }