LucaFrat commited on
Commit
f0e5793
·
verified ·
1 Parent(s): 0dd1468

Upload folder using huggingface_hub

Browse files
checkpoint-1000/config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 40,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": false,
5
+ "architectures": [
6
+ "Gr00tN1d7"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_trainable_params_fp32": true,
12
+ "color_jitter_params": {
13
+ "brightness": 0.3,
14
+ "contrast": 0.4,
15
+ "hue": 0.08,
16
+ "saturation": 0.5
17
+ },
18
+ "crop_fraction": 0.95,
19
+ "diffusion_model_cfg": {
20
+ "attention_head_dim": 48,
21
+ "dropout": 0.2,
22
+ "final_dropout": true,
23
+ "interleave_self_attention": true,
24
+ "norm_type": "ada_norm",
25
+ "num_attention_heads": 32,
26
+ "num_layers": 32,
27
+ "output_dim": 1024,
28
+ "positional_embeddings": null
29
+ },
30
+ "dtype": "float32",
31
+ "exclude_state": false,
32
+ "formalize_language": true,
33
+ "hidden_size": 1024,
34
+ "image_crop_size": [
35
+ 230,
36
+ 230
37
+ ],
38
+ "image_target_size": [
39
+ 256,
40
+ 256
41
+ ],
42
+ "letter_box_transform": false,
43
+ "load_bf16": false,
44
+ "max_action_dim": 132,
45
+ "max_num_embodiments": 32,
46
+ "max_seq_len": 1024,
47
+ "max_state_dim": 132,
48
+ "model_dtype": "bfloat16",
49
+ "model_name": "nvidia/Cosmos-Reason2-2B",
50
+ "model_type": "Gr00tN1d7",
51
+ "noise_beta_alpha": 1.5,
52
+ "noise_beta_beta": 1.0,
53
+ "noise_s": 0.999,
54
+ "num_inference_timesteps": 4,
55
+ "num_timestep_buckets": 1000,
56
+ "random_history_crop": true,
57
+ "random_rotation_angle": 0,
58
+ "reproject_vision": false,
59
+ "rtc_ramp_rate": 6.0,
60
+ "select_layer": 16,
61
+ "shortest_image_edge": 256,
62
+ "state_dropout_prob": 0.2,
63
+ "state_gaussian_noise_std": 0.0,
64
+ "transformers_version": "4.57.3",
65
+ "tune_diffusion_model": true,
66
+ "tune_linear": true,
67
+ "tune_llm": false,
68
+ "tune_projector": true,
69
+ "tune_top_llm_layers": 0,
70
+ "tune_visual": false,
71
+ "tune_vlln": true,
72
+ "use_albumentations": true,
73
+ "use_alternate_vl_dit": true,
74
+ "use_flash_attention": true,
75
+ "use_future_tokens": false,
76
+ "use_mean_std": false,
77
+ "use_percentiles": true,
78
+ "use_vl_self_attention": true,
79
+ "use_vlln": true,
80
+ "vl_self_attention_cfg": {
81
+ "attention_head_dim": 64,
82
+ "dropout": 0.2,
83
+ "final_dropout": true,
84
+ "num_attention_heads": 32,
85
+ "num_layers": 4,
86
+ "positional_embeddings": null
87
+ }
88
+ }
checkpoint-1000/embodiment_id.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "oxe_droid": 17,
4
+ "oxe_fractal": 18,
5
+ "oxe_language_table": 19,
6
+ "oxe_bridge": 20,
7
+ "unknown": 22,
8
+ "gr1_unified": 20,
9
+ "agibot": 26,
10
+ "sim_behavior_r1_pro": 23,
11
+ "xdof": 24,
12
+ "xdof_oss_data": 25,
13
+ "unitree_g1_full_body_with_waist_height_nav_cmd": 25,
14
+ "real_r1_pro_sharpa": 27,
15
+ "real_r1_pro_sharpa_add_view": 27,
16
+ "real_r1_pro_sharpa_relative_arm_joint": 26,
17
+ "real_r1_pro_sharpa_delta_eef": 26,
18
+ "real_r1_pro_sharpa_absolute_eef": 26,
19
+ "real_r1_pro_sharpa_meanstd": 26,
20
+ "real_r1_pro_sharpa_relative_eef": 26,
21
+ "real_r1_pro_sharpa_relative_eef_add_view": 26,
22
+ "real_r1_pro_sharpa_relative_eef_relative_hand": 26,
23
+ "real_r1_pro_sharpa_relative_eef_human": 26,
24
+ "real_r1_pro_sharpa_relative_eef_human_add_view": 26,
25
+ "real_r1_pro_sharpa_relative_eef_human_relative_hand": 26,
26
+ "real_r1_pro_sharpa_relative_eef_egodex": 26,
27
+ "real_r1_pro_sharpa_relative_eef_egodex_relative_hand": 26,
28
+ "real_r1_pro_sharpa_relative_eef_egodex_wrist_only": 26,
29
+ "real_r1_pro_sharpa_relative_eef_maxinsights": 26,
30
+ "real_r1_pro_sharpa_relative_eef_maxinsights_relative_hand": 26,
31
+ "real_r1_pro_sharpa_relative_eef_mecka": 26,
32
+ "real_r1_pro_sharpa_relative_eef_mecka_relative_hand": 26,
33
+ "real_g1_relative_eef_absolute_joints": 25,
34
+ "real_g1_relative_eef_absolute_joints_wrist_cam": 25,
35
+ "real_g1_relative_eef_relative_joints": 25,
36
+ "real_r1_pro_sharpa_relative_eef_relative_hand_relative_joint": 26,
37
+ "real_r1_pro_sharpa_relative_joint": 29,
38
+ "oxe_droid_relative_eef_relative_joint": 24,
39
+ "oxe_droid_relative_eef_relative_joint_swapped": 24,
40
+ "oxe_droid_relative_eef_relative_joint_upweight_z": 24,
41
+ "oxe_droid_relative_eef_relative_joint_upweight_z_swapped": 24,
42
+ "oxe_droid_relative_eef_relative_joint_3view": 24,
43
+ "oxe_droid_relative_eef_relative_joint_3view_swapped": 24,
44
+ "oxe_droid_relative_eef": 24,
45
+ "oxe_droid_joint_position_relative": 24,
46
+ "xdof_relative_eef_relative_joint": 27,
47
+ "xdof_relative_eef_relative_joint_subtask": 27,
48
+ "xdof_relative_eef": 27,
49
+ "xdof_relative_joint": 28,
50
+ "simpler_env_google": 0,
51
+ "simpler_env_widowx": 1,
52
+ "libero_sim": 2,
53
+ "droid_sim": 3,
54
+ "unitree_g1_sonic": 11,
55
+ "new_embodiment": 10,
56
+ "robocasa_gr1_tabletop": 10
57
+ }
checkpoint-1000/experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d7
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Cosmos-Reason2-2B
6
+ backbone_model_type: qwen
7
+ model_revision: null
8
+ tune_top_llm_layers: 0
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 12
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ backbone_trainable_params_fp32: true
17
+ image_crop_size:
18
+ - 230
19
+ - 230
20
+ image_target_size:
21
+ - 256
22
+ - 256
23
+ shortest_image_edge: null
24
+ crop_fraction: null
25
+ random_rotation_angle: null
26
+ color_jitter_params:
27
+ brightness: 0.3
28
+ contrast: 0.4
29
+ saturation: 0.5
30
+ hue: 0.08
31
+ use_albumentations_transforms: true
32
+ extra_augmentation_config: null
33
+ formalize_language: true
34
+ apply_sincos_state_encoding: false
35
+ use_percentiles: true
36
+ use_relative_action: true
37
+ max_state_dim: 132
38
+ max_action_dim: 132
39
+ action_horizon: 40
40
+ hidden_size: 1024
41
+ input_embedding_dim: 1536
42
+ state_history_length: 1
43
+ add_pos_embed: true
44
+ attn_dropout: 0.2
45
+ use_vlln: true
46
+ max_seq_len: 1024
47
+ use_alternate_vl_dit: true
48
+ attend_text_every_n_blocks: 2
49
+ diffusion_model_cfg:
50
+ positional_embeddings: null
51
+ num_layers: 16
52
+ num_attention_heads: 32
53
+ attention_head_dim: 48
54
+ norm_type: ada_norm
55
+ dropout: 0.2
56
+ final_dropout: true
57
+ output_dim: 1024
58
+ interleave_self_attention: true
59
+ num_inference_timesteps: 4
60
+ noise_beta_alpha: 1.5
61
+ noise_beta_beta: 1.0
62
+ noise_s: 0.999
63
+ num_timestep_buckets: 1000
64
+ tune_projector: true
65
+ tune_diffusion_model: true
66
+ tune_vlln: true
67
+ state_dropout_prob: 0.2
68
+ exclude_state: false
69
+ use_mean_std: false
70
+ max_num_embodiments: 32
71
+ data:
72
+ datasets:
73
+ - dataset_paths:
74
+ - /home/ubuntu/groot-files/dataset_wbc_train
75
+ embodiment_tag: unitree_g1_sonic
76
+ mix_ratio: 1.0
77
+ dataset_type: physical_embodiment
78
+ val_dataset_path: null
79
+ modality_configs:
80
+ unitree_g1_sonic:
81
+ video:
82
+ delta_indices:
83
+ - 0
84
+ modality_keys:
85
+ - ego_view
86
+ sin_cos_embedding_keys: null
87
+ mean_std_embedding_keys: null
88
+ action_configs: null
89
+ state:
90
+ delta_indices:
91
+ - 0
92
+ modality_keys:
93
+ - left_leg
94
+ - right_leg
95
+ - waist
96
+ - left_arm
97
+ - right_arm
98
+ - left_hand
99
+ - right_hand
100
+ - projected_gravity
101
+ sin_cos_embedding_keys: null
102
+ mean_std_embedding_keys: null
103
+ action_configs: null
104
+ action:
105
+ delta_indices:
106
+ - 0
107
+ - 1
108
+ - 2
109
+ - 3
110
+ - 4
111
+ - 5
112
+ - 6
113
+ - 7
114
+ - 8
115
+ - 9
116
+ - 10
117
+ - 11
118
+ - 12
119
+ - 13
120
+ - 14
121
+ - 15
122
+ - 16
123
+ - 17
124
+ - 18
125
+ - 19
126
+ - 20
127
+ - 21
128
+ - 22
129
+ - 23
130
+ - 24
131
+ - 25
132
+ - 26
133
+ - 27
134
+ - 28
135
+ - 29
136
+ - 30
137
+ - 31
138
+ - 32
139
+ - 33
140
+ - 34
141
+ - 35
142
+ - 36
143
+ - 37
144
+ - 38
145
+ - 39
146
+ modality_keys:
147
+ - motion_token
148
+ - left_hand_joints
149
+ - right_hand_joints
150
+ sin_cos_embedding_keys: null
151
+ mean_std_embedding_keys: null
152
+ action_configs:
153
+ - rep: ABSOLUTE
154
+ type: NON_EEF
155
+ format: DEFAULT
156
+ state_key: null
157
+ - rep: ABSOLUTE
158
+ type: NON_EEF
159
+ format: DEFAULT
160
+ state_key: null
161
+ - rep: ABSOLUTE
162
+ type: NON_EEF
163
+ format: DEFAULT
164
+ state_key: null
165
+ language:
166
+ delta_indices:
167
+ - 0
168
+ modality_keys:
169
+ - annotation.human.task_description
170
+ sin_cos_embedding_keys: null
171
+ mean_std_embedding_keys: null
172
+ action_configs: null
173
+ download_cache: false
174
+ shard_size: 1024
175
+ episode_sampling_rate: 0.1
176
+ num_shards_per_epoch: 100000
177
+ override_pretraining_statistics: true
178
+ mode: single_turn
179
+ random_chop: 0.0
180
+ mock_dataset_mode: false
181
+ shuffle: true
182
+ seed: 42
183
+ multiprocessing_context: fork
184
+ allow_padding: false
185
+ subsample_ratio: 1.0
186
+ image_crop_size:
187
+ - 244
188
+ - 244
189
+ image_target_size:
190
+ - 224
191
+ - 224
192
+ video_backend: torchcodec
193
+ training:
194
+ output_dir: /home/ubuntu/groot-files/checkpoints/run-2026-05-28-090411
195
+ experiment_name: null
196
+ max_steps: 10000
197
+ global_batch_size: 32
198
+ batch_size: null
199
+ gradient_accumulation_steps: 1
200
+ learning_rate: 0.0001
201
+ lr_scheduler_type: cosine
202
+ weight_decay: 1.0e-05
203
+ warmup_ratio: 0.05
204
+ warmup_steps: 0
205
+ max_grad_norm: 1.0
206
+ optim: adamw_torch
207
+ start_from_checkpoint: nvidia/GR00T-N1.7-3B
208
+ skip_weight_loading: false
209
+ tf32: true
210
+ fp16: false
211
+ bf16: true
212
+ eval_bf16: true
213
+ logging_steps: 10
214
+ save_steps: 1000
215
+ save_total_limit: 10
216
+ save_vl_model: false
217
+ save_only_model: false
218
+ upload_checkpoints: false
219
+ upload_every: 1000
220
+ upload_last_n_checkpoints: 5
221
+ max_concurrent_uploads: 2
222
+ eval_strategy: 'no'
223
+ eval_steps: 500
224
+ eval_set_split_ratio: 0.1
225
+ eval_batch_size: 2
226
+ save_best_eval_metric_name: ''
227
+ save_best_eval_metric_greater_is_better: true
228
+ eval_dataset_path: /home/ubuntu/groot-files/dataset_wbc_eval
229
+ eval_num_batches: 50
230
+ deepspeed_stage: 2
231
+ gradient_checkpointing: false
232
+ transformers_trust_remote_code: true
233
+ transformers_local_files_only: false
234
+ transformers_cache_dir: null
235
+ transformers_access_token: null
236
+ use_ddp: false
237
+ ddp_bucket_cap_mb: 100
238
+ num_gpus: 1
239
+ dataloader_num_workers: 6
240
+ remove_unused_columns: false
241
+ use_wandb: true
242
+ wandb_project: groot-wbc
243
+ enable_profiling: false
244
+ max_retries: 3
245
+ assert_loss_less_than: null
246
+ add_rl_callback: false
247
+ enable_open_loop_eval: false
248
+ open_loop_eval_traj_ids:
249
+ - 0
250
+ open_loop_eval_steps_per_traj: 100
251
+ open_loop_eval_plot_indices: null
252
+ max_steps: 10000
253
+ save_steps: 1000
checkpoint-1000/experiment_cfg/config.yaml ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /home/ubuntu/groot-files/dataset_wbc_train
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: unitree_g1_sonic
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ unitree_g1_sonic:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: &id002 !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - absolute
30
+ state_key: null
31
+ type: &id003 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: *id002
36
+ state_key: null
37
+ type: *id003
38
+ - !!python/object:gr00t.data.types.ActionConfig
39
+ format: *id001
40
+ rep: *id002
41
+ state_key: null
42
+ type: *id003
43
+ delta_indices:
44
+ - 0
45
+ - 1
46
+ - 2
47
+ - 3
48
+ - 4
49
+ - 5
50
+ - 6
51
+ - 7
52
+ - 8
53
+ - 9
54
+ - 10
55
+ - 11
56
+ - 12
57
+ - 13
58
+ - 14
59
+ - 15
60
+ - 16
61
+ - 17
62
+ - 18
63
+ - 19
64
+ - 20
65
+ - 21
66
+ - 22
67
+ - 23
68
+ - 24
69
+ - 25
70
+ - 26
71
+ - 27
72
+ - 28
73
+ - 29
74
+ - 30
75
+ - 31
76
+ - 32
77
+ - 33
78
+ - 34
79
+ - 35
80
+ - 36
81
+ - 37
82
+ - 38
83
+ - 39
84
+ mean_std_embedding_keys: null
85
+ modality_keys:
86
+ - motion_token
87
+ - left_hand_joints
88
+ - right_hand_joints
89
+ sin_cos_embedding_keys: null
90
+ language: !!python/object:gr00t.data.types.ModalityConfig
91
+ action_configs: null
92
+ delta_indices:
93
+ - 0
94
+ mean_std_embedding_keys: null
95
+ modality_keys:
96
+ - annotation.human.task_description
97
+ sin_cos_embedding_keys: null
98
+ state: !!python/object:gr00t.data.types.ModalityConfig
99
+ action_configs: null
100
+ delta_indices:
101
+ - 0
102
+ mean_std_embedding_keys: null
103
+ modality_keys:
104
+ - left_leg
105
+ - right_leg
106
+ - waist
107
+ - left_arm
108
+ - right_arm
109
+ - left_hand
110
+ - right_hand
111
+ - projected_gravity
112
+ sin_cos_embedding_keys: null
113
+ video: !!python/object:gr00t.data.types.ModalityConfig
114
+ action_configs: null
115
+ delta_indices:
116
+ - 0
117
+ mean_std_embedding_keys: null
118
+ modality_keys:
119
+ - ego_view
120
+ sin_cos_embedding_keys: null
121
+ mode: single_turn
122
+ multiprocessing_context: fork
123
+ num_shards_per_epoch: 100000
124
+ override_pretraining_statistics: true
125
+ random_chop: 0.0
126
+ seed: 42
127
+ shard_size: 1024
128
+ shuffle: true
129
+ subsample_ratio: 1.0
130
+ video_backend: torchcodec
131
+ load_config_path: null
132
+ model: !!python/object:gr00t.configs.model.gr00t_n1d7.Gr00tN1d7Config
133
+ _attn_implementation_internal: null
134
+ _commit_hash: null
135
+ _name_or_path: ''
136
+ _output_attentions: false
137
+ add_cross_attention: false
138
+ architectures: null
139
+ backbone_trainable_params_fp32: true
140
+ bad_words_ids: null
141
+ begin_suppress_tokens: null
142
+ bos_token_id: null
143
+ chunk_size_feed_forward: 0
144
+ color_jitter_params:
145
+ brightness: 0.3
146
+ contrast: 0.4
147
+ hue: 0.08
148
+ saturation: 0.5
149
+ cross_attention_hidden_size: null
150
+ decoder_start_token_id: null
151
+ diffusion_model_cfg:
152
+ attention_head_dim: 48
153
+ dropout: 0.2
154
+ final_dropout: true
155
+ interleave_self_attention: true
156
+ norm_type: ada_norm
157
+ num_attention_heads: 32
158
+ num_layers: 16
159
+ output_dim: 1024
160
+ positional_embeddings: null
161
+ diversity_penalty: 0.0
162
+ do_sample: false
163
+ dtype: null
164
+ early_stopping: false
165
+ encoder_no_repeat_ngram_size: 0
166
+ eos_token_id: null
167
+ exponential_decay_length_penalty: null
168
+ extra_augmentation_config: null
169
+ finetuning_task: null
170
+ forced_bos_token_id: null
171
+ forced_eos_token_id: null
172
+ id2label:
173
+ 0: LABEL_0
174
+ 1: LABEL_1
175
+ is_decoder: false
176
+ is_encoder_decoder: false
177
+ label2id:
178
+ LABEL_0: 0
179
+ LABEL_1: 1
180
+ length_penalty: 1.0
181
+ load_bf16: false
182
+ max_length: 20
183
+ min_length: 0
184
+ model_name: nvidia/Cosmos-Reason2-2B
185
+ no_repeat_ngram_size: 0
186
+ num_beam_groups: 1
187
+ num_beams: 1
188
+ num_return_sequences: 1
189
+ output_hidden_states: false
190
+ output_scores: false
191
+ pad_token_id: null
192
+ prefix: null
193
+ problem_type: null
194
+ pruned_heads: {}
195
+ random_rotation_angle: null
196
+ remove_invalid_values: false
197
+ repetition_penalty: 1.0
198
+ reproject_vision: false
199
+ return_dict: true
200
+ return_dict_in_generate: false
201
+ sep_token_id: null
202
+ state_dropout_prob: 0.2
203
+ suppress_tokens: null
204
+ task_specific_params: null
205
+ temperature: 1.0
206
+ tf_legacy_loss: false
207
+ tie_encoder_decoder: false
208
+ tie_word_embeddings: true
209
+ tokenizer_class: null
210
+ top_k: 50
211
+ top_p: 1.0
212
+ torchscript: false
213
+ transformers_version: null
214
+ tune_diffusion_model: true
215
+ tune_llm: false
216
+ tune_projector: true
217
+ tune_visual: false
218
+ typical_p: 1.0
219
+ use_bfloat16: false
220
+ use_relative_action: true
221
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
222
+ add_rl_callback: false
223
+ assert_loss_less_than: null
224
+ batch_size: null
225
+ bf16: true
226
+ dataloader_num_workers: 6
227
+ ddp_bucket_cap_mb: 100
228
+ deepspeed_stage: 2
229
+ enable_open_loop_eval: false
230
+ enable_profiling: false
231
+ eval_batch_size: 2
232
+ eval_bf16: true
233
+ eval_dataset_path: /home/ubuntu/groot-files/dataset_wbc_eval
234
+ eval_num_batches: 50
235
+ eval_set_split_ratio: 0.1
236
+ eval_steps: 500
237
+ eval_strategy: 'no'
238
+ experiment_name: null
239
+ fp16: false
240
+ global_batch_size: 32
241
+ gradient_accumulation_steps: 1
242
+ gradient_checkpointing: false
243
+ learning_rate: 0.0001
244
+ logging_steps: 10
245
+ lr_scheduler_type: cosine
246
+ max_concurrent_uploads: 2
247
+ max_grad_norm: 1.0
248
+ max_retries: 3
249
+ max_steps: 10000
250
+ num_gpus: 1
251
+ open_loop_eval_plot_indices: null
252
+ open_loop_eval_steps_per_traj: 100
253
+ open_loop_eval_traj_ids:
254
+ - 0
255
+ optim: adamw_torch
256
+ output_dir: /home/ubuntu/groot-files/checkpoints/run-2026-05-28-090411
257
+ remove_unused_columns: false
258
+ save_best_eval_metric_greater_is_better: true
259
+ save_best_eval_metric_name: ''
260
+ save_only_model: false
261
+ save_steps: 1000
262
+ save_total_limit: 10
263
+ save_vl_model: false
264
+ skip_weight_loading: false
265
+ start_from_checkpoint: nvidia/GR00T-N1.7-3B
266
+ tf32: true
267
+ transformers_access_token: null
268
+ transformers_cache_dir: null
269
+ transformers_local_files_only: false
270
+ transformers_trust_remote_code: true
271
+ upload_checkpoints: false
272
+ upload_every: 1000
273
+ upload_last_n_checkpoints: 5
274
+ use_ddp: false
275
+ use_wandb: true
276
+ wandb_project: groot-wbc
277
+ warmup_ratio: 0.05
278
+ warmup_steps: 0
279
+ weight_decay: 1.0e-05
checkpoint-1000/experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "unitree_g1_sonic": {
3
+ "state": {
4
+ "left_leg": {
5
+ "min": [
6
+ -0.5145056843757629,
7
+ -0.18529635667800903,
8
+ -0.5630622506141663,
9
+ 0.10770249366760254,
10
+ -0.6103389859199524,
11
+ -0.21104247868061066
12
+ ],
13
+ "max": [
14
+ 0.4081326425075531,
15
+ 0.4657762050628662,
16
+ 0.9445304274559021,
17
+ 1.6440609693527222,
18
+ 0.2812706232070923,
19
+ 0.24254725873470306
20
+ ],
21
+ "mean": [
22
+ -0.012148142792284489,
23
+ 0.049336664378643036,
24
+ 0.22622817754745483,
25
+ 0.3236576318740845,
26
+ -0.1981654316186905,
27
+ -0.0009371974156238139
28
+ ],
29
+ "std": [
30
+ 0.15010829269886017,
31
+ 0.09162046015262604,
32
+ 0.13689051568508148,
33
+ 0.18744750320911407,
34
+ 0.0900125801563263,
35
+ 0.07028596103191376
36
+ ],
37
+ "q01": [
38
+ -0.3930038321018219,
39
+ -0.13217630386352539,
40
+ -0.10907109707593918,
41
+ 0.13054380238056182,
42
+ -0.45879220962524414,
43
+ -0.152383496761322
44
+ ],
45
+ "q99": [
46
+ 0.23233672738075234,
47
+ 0.3159625411033626,
48
+ 0.5810866618156422,
49
+ 1.0221275281906117,
50
+ 0.03577460661530483,
51
+ 0.16769260168075562
52
+ ]
53
+ },
54
+ "right_leg": {
55
+ "min": [
56
+ -0.777660071849823,
57
+ -0.49602121114730835,
58
+ -1.0866719484329224,
59
+ 0.07200334966182709,
60
+ -0.623672366142273,
61
+ -0.23122255504131317
62
+ ],
63
+ "max": [
64
+ 0.36446529626846313,
65
+ 0.15863065421581268,
66
+ 0.29946956038475037,
67
+ 2.0259454250335693,
68
+ 0.24596133828163147,
69
+ 0.18925869464874268
70
+ ],
71
+ "mean": [
72
+ 0.010633867233991623,
73
+ -0.07383165508508682,
74
+ -0.21287542581558228,
75
+ 0.3419981598854065,
76
+ -0.22440023720264435,
77
+ 0.013267790898680687
78
+ ],
79
+ "std": [
80
+ 0.1268979012966156,
81
+ 0.08248293399810791,
82
+ 0.1612742692232132,
83
+ 0.20557346940040588,
84
+ 0.10104448348283768,
85
+ 0.055336352437734604
86
+ ],
87
+ "q01": [
88
+ -0.3428873121738434,
89
+ -0.3137379336357117,
90
+ -0.702793300151825,
91
+ 0.14524445354938506,
92
+ -0.49535272479057313,
93
+ -0.1368582433462143
94
+ ],
95
+ "q99": [
96
+ 0.22101355910301154,
97
+ 0.0916959154605865,
98
+ 0.13021636724472033,
99
+ 1.031281275749206,
100
+ 0.004563198033720189,
101
+ 0.11666926741600031
102
+ ]
103
+ },
104
+ "waist": {
105
+ "min": [
106
+ -0.37403565645217896,
107
+ -0.243158221244812,
108
+ -0.05312114581465721
109
+ ],
110
+ "max": [
111
+ 0.2474018782377243,
112
+ 0.17617863416671753,
113
+ 0.04943050444126129
114
+ ],
115
+ "mean": [
116
+ -0.03901316970586777,
117
+ -0.02152136340737343,
118
+ 0.0024535013362765312
119
+ ],
120
+ "std": [
121
+ 0.07381759583950043,
122
+ 0.046153053641319275,
123
+ 0.008072787895798683
124
+ ],
125
+ "q01": [
126
+ -0.2433778864145279,
127
+ -0.13698728382587433,
128
+ -0.01672225959599018
129
+ ],
130
+ "q99": [
131
+ 0.15031063556671143,
132
+ 0.08901623487472521,
133
+ 0.023261465877294534
134
+ ]
135
+ },
136
+ "left_arm": {
137
+ "min": [
138
+ -0.8068499565124512,
139
+ -0.01750895380973816,
140
+ -0.7458622455596924,
141
+ -0.5916731953620911,
142
+ -1.302229881286621,
143
+ -0.2953871786594391,
144
+ -0.9342422485351562
145
+ ],
146
+ "max": [
147
+ 0.4923958480358124,
148
+ 0.540572464466095,
149
+ 0.35323503613471985,
150
+ 1.320529818534851,
151
+ 1.2396363019943237,
152
+ 0.5774239301681519,
153
+ 0.6715000867843628
154
+ ],
155
+ "mean": [
156
+ -0.10333303362131119,
157
+ 0.189546599984169,
158
+ -0.12471432983875275,
159
+ 0.28616011142730713,
160
+ -0.3353458344936371,
161
+ 0.15808047354221344,
162
+ -0.03580402210354805
163
+ ],
164
+ "std": [
165
+ 0.2009865939617157,
166
+ 0.07324191927909851,
167
+ 0.1315729022026062,
168
+ 0.47473788261413574,
169
+ 0.2912229895591736,
170
+ 0.12051952630281448,
171
+ 0.1922927051782608
172
+ ],
173
+ "q01": [
174
+ -0.545279369354248,
175
+ 0.02649999812245369,
176
+ -0.463615984916687,
177
+ -0.338695775270462,
178
+ -0.9348985409736633,
179
+ -0.12025019407272339,
180
+ -0.5021582007408142
181
+ ],
182
+ "q99": [
183
+ 0.2839887523651115,
184
+ 0.3847482764720913,
185
+ 0.16759651601314537,
186
+ 1.2634340524673462,
187
+ 0.6452915716171259,
188
+ 0.4740810847282409,
189
+ 0.40644835114479044
190
+ ]
191
+ },
192
+ "right_arm": {
193
+ "min": [
194
+ -0.913030207157135,
195
+ -0.6399815678596497,
196
+ -0.3427368700504303,
197
+ -0.39627039432525635,
198
+ -1.251069188117981,
199
+ -0.2754094898700714,
200
+ -0.8207756280899048
201
+ ],
202
+ "max": [
203
+ 0.45103830099105835,
204
+ -0.01125318743288517,
205
+ 0.6279014945030212,
206
+ 1.2988982200622559,
207
+ 1.0332199335098267,
208
+ 0.5550373792648315,
209
+ 0.9986335039138794
210
+ ],
211
+ "mean": [
212
+ -0.0982118472456932,
213
+ -0.20830810070037842,
214
+ 0.08582146465778351,
215
+ 0.22871090471744537,
216
+ 0.1546783298254013,
217
+ 0.14763931930065155,
218
+ -0.04254811629652977
219
+ ],
220
+ "std": [
221
+ 0.22019562125205994,
222
+ 0.06811994314193726,
223
+ 0.1300000548362732,
224
+ 0.46189504861831665,
225
+ 0.2503209114074707,
226
+ 0.10672769695520401,
227
+ 0.18367928266525269
228
+ ],
229
+ "q01": [
230
+ -0.6061021685600281,
231
+ -0.39821806669235227,
232
+ -0.21507561206817627,
233
+ -0.3157608461380005,
234
+ -0.7030838251113891,
235
+ -0.12177794367074966,
236
+ -0.5326336216926575
237
+ ],
238
+ "q99": [
239
+ 0.32033881783485385,
240
+ -0.07859542340040213,
241
+ 0.34321335792541496,
242
+ 1.2582597732543945,
243
+ 0.6935366868972775,
244
+ 0.41980740427970886,
245
+ 0.3515860009193411
246
+ ]
247
+ },
248
+ "left_hand": {
249
+ "min": [
250
+ 0.0,
251
+ 0.0,
252
+ 0.0,
253
+ 0.0,
254
+ 0.0,
255
+ 0.0,
256
+ 0.0
257
+ ],
258
+ "max": [
259
+ 0.0,
260
+ 0.0,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0
266
+ ],
267
+ "mean": [
268
+ 0.0,
269
+ 0.0,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0
275
+ ],
276
+ "std": [
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0
284
+ ],
285
+ "q01": [
286
+ 0.0,
287
+ 0.0,
288
+ 0.0,
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0
293
+ ],
294
+ "q99": [
295
+ 0.0,
296
+ 0.0,
297
+ 0.0,
298
+ 0.0,
299
+ 0.0,
300
+ 0.0,
301
+ 0.0
302
+ ]
303
+ },
304
+ "right_hand": {
305
+ "min": [
306
+ 0.0,
307
+ 0.0,
308
+ 0.0,
309
+ 0.0,
310
+ 0.0,
311
+ 0.0,
312
+ 0.0
313
+ ],
314
+ "max": [
315
+ 0.0,
316
+ 0.0,
317
+ 0.0,
318
+ 0.0,
319
+ 0.0,
320
+ 0.0,
321
+ 0.0
322
+ ],
323
+ "mean": [
324
+ 0.0,
325
+ 0.0,
326
+ 0.0,
327
+ 0.0,
328
+ 0.0,
329
+ 0.0,
330
+ 0.0
331
+ ],
332
+ "std": [
333
+ 0.0,
334
+ 0.0,
335
+ 0.0,
336
+ 0.0,
337
+ 0.0,
338
+ 0.0,
339
+ 0.0
340
+ ],
341
+ "q01": [
342
+ 0.0,
343
+ 0.0,
344
+ 0.0,
345
+ 0.0,
346
+ 0.0,
347
+ 0.0,
348
+ 0.0
349
+ ],
350
+ "q99": [
351
+ 0.0,
352
+ 0.0,
353
+ 0.0,
354
+ 0.0,
355
+ 0.0,
356
+ 0.0,
357
+ 0.0
358
+ ]
359
+ },
360
+ "projected_gravity": {
361
+ "min": [
362
+ -0.2364102452993393,
363
+ -0.14225876331329346,
364
+ -0.9999999403953552
365
+ ],
366
+ "max": [
367
+ 0.1328846514225006,
368
+ 0.1775456815958023,
369
+ -0.9715155363082886
370
+ ],
371
+ "mean": [
372
+ -0.04713863134384155,
373
+ -0.00449190242215991,
374
+ -0.997578501701355
375
+ ],
376
+ "std": [
377
+ 0.044011253863573074,
378
+ 0.0352567620575428,
379
+ 0.0029932048637504607
380
+ ],
381
+ "q01": [
382
+ -0.16278759896755218,
383
+ -0.08589273869991303,
384
+ -0.9999748921394348
385
+ ],
386
+ "q99": [
387
+ 0.05313032269477822,
388
+ 0.0724137070775032,
389
+ -0.9858456301689148
390
+ ]
391
+ }
392
+ },
393
+ "action": {
394
+ "motion_token": {
395
+ "min": [
396
+ -0.3125,
397
+ -0.4375,
398
+ -0.5,
399
+ -0.5,
400
+ -0.5625,
401
+ -0.25,
402
+ -0.375,
403
+ -0.1875,
404
+ -0.1875,
405
+ -0.5,
406
+ -0.375,
407
+ -0.4375,
408
+ -0.3125,
409
+ -0.4375,
410
+ -0.5,
411
+ -0.5,
412
+ -0.1875,
413
+ -0.3125,
414
+ -0.4375,
415
+ -0.5625,
416
+ -0.4375,
417
+ -0.5625,
418
+ -0.375,
419
+ -0.5,
420
+ -0.3125,
421
+ -0.375,
422
+ -0.375,
423
+ -0.5,
424
+ -0.25,
425
+ -0.25,
426
+ -0.1875,
427
+ -0.375,
428
+ -0.5,
429
+ -0.25,
430
+ -0.4375,
431
+ -0.5,
432
+ -0.5625,
433
+ -0.375,
434
+ -0.125,
435
+ -0.25,
436
+ -0.375,
437
+ -0.5,
438
+ -0.375,
439
+ -0.5,
440
+ -0.5,
441
+ -0.5625,
442
+ -0.4375,
443
+ -0.375,
444
+ -0.3125,
445
+ -0.4375,
446
+ -0.3125,
447
+ -0.4375,
448
+ -0.125,
449
+ -0.5625,
450
+ -0.375,
451
+ -0.25,
452
+ -0.4375,
453
+ -0.5,
454
+ -0.1875,
455
+ -0.5,
456
+ -0.375,
457
+ -0.375,
458
+ -0.375,
459
+ -0.375
460
+ ],
461
+ "max": [
462
+ 0.25,
463
+ 0.25,
464
+ 0.1875,
465
+ 0.125,
466
+ 0.25,
467
+ 0.4375,
468
+ 0.375,
469
+ 0.4375,
470
+ 0.4375,
471
+ 0.375,
472
+ 0.375,
473
+ 0.25,
474
+ 0.4375,
475
+ 0.1875,
476
+ 0.125,
477
+ 0.375,
478
+ 0.375,
479
+ 0.25,
480
+ 0.375,
481
+ 0.3125,
482
+ 0.3125,
483
+ 0.0625,
484
+ 0.3125,
485
+ 0.1875,
486
+ 0.3125,
487
+ 0.4375,
488
+ 0.3125,
489
+ 0.375,
490
+ 0.375,
491
+ 0.375,
492
+ 0.4375,
493
+ 0.375,
494
+ 0.5,
495
+ 0.3125,
496
+ 0.25,
497
+ 0.375,
498
+ 0.4375,
499
+ 0.1875,
500
+ 0.375,
501
+ 0.25,
502
+ 0.1875,
503
+ 0.4375,
504
+ 0.3125,
505
+ 0.25,
506
+ 0.375,
507
+ 0.25,
508
+ 0.25,
509
+ 0.25,
510
+ 0.375,
511
+ 0.25,
512
+ 0.375,
513
+ 0.4375,
514
+ 0.5625,
515
+ 0.3125,
516
+ 0.375,
517
+ 0.375,
518
+ 0.4375,
519
+ 0.5,
520
+ 0.375,
521
+ 0.5,
522
+ 0.4375,
523
+ 0.375,
524
+ 0.375,
525
+ 0.375
526
+ ],
527
+ "mean": [
528
+ -0.06439097970724106,
529
+ -0.06521879881620407,
530
+ -0.125,
531
+ -0.1847969889640808,
532
+ -0.1373879760503769,
533
+ 0.04822782054543495,
534
+ 0.027975188568234444,
535
+ 0.15312707424163818,
536
+ 0.11575789749622345,
537
+ -0.07767443358898163,
538
+ 0.03542105108499527,
539
+ -0.05307518690824509,
540
+ -0.06384360790252686,
541
+ -0.18314436078071594,
542
+ -0.19289849698543549,
543
+ -0.03389473631978035,
544
+ 0.09458722174167633,
545
+ -0.04276691749691963,
546
+ -0.03826015070080757,
547
+ -0.10829849541187286,
548
+ -0.071824811398983,
549
+ -0.22079698741436005,
550
+ -0.04714285582304001,
551
+ -0.22834135591983795,
552
+ -0.01947067677974701,
553
+ 0.0074248118326067924,
554
+ 0.00573834590613842,
555
+ -0.060690224170684814,
556
+ 0.06471353024244308,
557
+ 0.06146090105175972,
558
+ 0.16022330522537231,
559
+ -0.01657293178141117,
560
+ 0.0007180451066233218,
561
+ 0.10714511573314667,
562
+ -0.07185939699411392,
563
+ 0.012763909995555878,
564
+ -0.09171729534864426,
565
+ -0.0794999971985817,
566
+ 0.15587669610977173,
567
+ -0.011427067220211029,
568
+ -0.07081428915262222,
569
+ 0.0370473675429821,
570
+ -0.020825562998652458,
571
+ -0.10491052269935608,
572
+ -0.07982255518436432,
573
+ -0.03099849633872509,
574
+ -0.0554308257997036,
575
+ -0.12368496507406235,
576
+ 0.07534285634756088,
577
+ -0.03026992455124855,
578
+ 0.061053384095430374,
579
+ -0.0092924814671278,
580
+ 0.17143608629703522,
581
+ -0.051742855459451675,
582
+ -0.017972933128476143,
583
+ 0.07894811779260635,
584
+ -0.0036270676646381617,
585
+ -0.0006766917067579925,
586
+ 0.07701954990625381,
587
+ -0.003338345792144537,
588
+ 0.10077368468046188,
589
+ 0.048967670649290085,
590
+ 0.049851126968860626,
591
+ 0.04476090148091316
592
+ ],
593
+ "std": [
594
+ 0.07011081278324127,
595
+ 0.10070349276065826,
596
+ 0.10618823021650314,
597
+ 0.1040232703089714,
598
+ 0.1346515566110611,
599
+ 0.12035326659679413,
600
+ 0.17316193878650665,
601
+ 0.08347848802804947,
602
+ 0.0856991782784462,
603
+ 0.14361310005187988,
604
+ 0.07408461719751358,
605
+ 0.14225201308727264,
606
+ 0.08691011369228363,
607
+ 0.09022863209247589,
608
+ 0.08300687372684479,
609
+ 0.10174085199832916,
610
+ 0.09679042547941208,
611
+ 0.06325257569551468,
612
+ 0.08858565986156464,
613
+ 0.1711788922548294,
614
+ 0.1577969342470169,
615
+ 0.10648369044065475,
616
+ 0.1129717081785202,
617
+ 0.1149425283074379,
618
+ 0.08891275525093079,
619
+ 0.12593857944011688,
620
+ 0.09133600443601608,
621
+ 0.19993668794631958,
622
+ 0.08262906223535538,
623
+ 0.11261534690856934,
624
+ 0.08883041143417358,
625
+ 0.1162080317735672,
626
+ 0.20441272854804993,
627
+ 0.06782399863004684,
628
+ 0.09499737620353699,
629
+ 0.19293878972530365,
630
+ 0.1464645266532898,
631
+ 0.09174318611621857,
632
+ 0.07088606059551239,
633
+ 0.07447929680347443,
634
+ 0.0700845718383789,
635
+ 0.2343086153268814,
636
+ 0.10589630156755447,
637
+ 0.1032341793179512,
638
+ 0.1765136867761612,
639
+ 0.10248377919197083,
640
+ 0.12065689265727997,
641
+ 0.07768698036670685,
642
+ 0.09366670250892639,
643
+ 0.09704228490591049,
644
+ 0.10575727373361588,
645
+ 0.12160521745681763,
646
+ 0.08417858183383942,
647
+ 0.12966477870941162,
648
+ 0.1364697813987732,
649
+ 0.1211245134472847,
650
+ 0.15620125830173492,
651
+ 0.22729310393333435,
652
+ 0.09351138025522232,
653
+ 0.22351068258285522,
654
+ 0.13720352947711945,
655
+ 0.09697328507900238,
656
+ 0.12150005251169205,
657
+ 0.09913279861211777
658
+ ],
659
+ "q01": [
660
+ -0.1875,
661
+ -0.3125,
662
+ -0.4375,
663
+ -0.4375,
664
+ -0.4375,
665
+ -0.1875,
666
+ -0.25,
667
+ -0.0625,
668
+ -0.125,
669
+ -0.4375,
670
+ -0.1875,
671
+ -0.375,
672
+ -0.1875,
673
+ -0.375,
674
+ -0.375,
675
+ -0.3125,
676
+ -0.125,
677
+ -0.1875,
678
+ -0.25,
679
+ -0.4375,
680
+ -0.375,
681
+ -0.4375,
682
+ -0.3125,
683
+ -0.4375,
684
+ -0.1875,
685
+ -0.25,
686
+ -0.1875,
687
+ -0.5,
688
+ -0.125,
689
+ -0.125,
690
+ -0.0625,
691
+ -0.25,
692
+ -0.375,
693
+ -0.0625,
694
+ -0.3125,
695
+ -0.4375,
696
+ -0.4375,
697
+ -0.25,
698
+ 0.0,
699
+ -0.1875,
700
+ -0.25,
701
+ -0.5,
702
+ -0.25,
703
+ -0.375,
704
+ -0.4375,
705
+ -0.375,
706
+ -0.3125,
707
+ -0.25,
708
+ -0.1875,
709
+ -0.3125,
710
+ -0.1875,
711
+ -0.3125,
712
+ -0.0625,
713
+ -0.375,
714
+ -0.3125,
715
+ -0.1875,
716
+ -0.375,
717
+ -0.375,
718
+ -0.125,
719
+ -0.4375,
720
+ -0.25,
721
+ -0.25,
722
+ -0.25,
723
+ -0.1875
724
+ ],
725
+ "q99": [
726
+ 0.125,
727
+ 0.1875,
728
+ 0.0625,
729
+ 0.0,
730
+ 0.125,
731
+ 0.3125,
732
+ 0.375,
733
+ 0.3125,
734
+ 0.3125,
735
+ 0.25,
736
+ 0.1875,
737
+ 0.1875,
738
+ 0.1875,
739
+ 0.0625,
740
+ 0.0,
741
+ 0.1875,
742
+ 0.3125,
743
+ 0.125,
744
+ 0.1875,
745
+ 0.1875,
746
+ 0.25,
747
+ 0.0,
748
+ 0.1875,
749
+ 0.0625,
750
+ 0.25,
751
+ 0.3125,
752
+ 0.1875,
753
+ 0.25,
754
+ 0.25,
755
+ 0.3125,
756
+ 0.3125,
757
+ 0.3125,
758
+ 0.375,
759
+ 0.25,
760
+ 0.125,
761
+ 0.3125,
762
+ 0.25,
763
+ 0.125,
764
+ 0.3125,
765
+ 0.125,
766
+ 0.125,
767
+ 0.375,
768
+ 0.1875,
769
+ 0.125,
770
+ 0.25,
771
+ 0.125,
772
+ 0.1875,
773
+ 0.0625,
774
+ 0.25,
775
+ 0.1875,
776
+ 0.25,
777
+ 0.25,
778
+ 0.375,
779
+ 0.1875,
780
+ 0.25,
781
+ 0.3125,
782
+ 0.375,
783
+ 0.4375,
784
+ 0.25,
785
+ 0.375,
786
+ 0.375,
787
+ 0.25,
788
+ 0.25,
789
+ 0.25
790
+ ]
791
+ },
792
+ "left_hand_joints": {
793
+ "min": [
794
+ 0.0,
795
+ 0.0,
796
+ 0.0,
797
+ 0.0,
798
+ 0.0,
799
+ 0.0,
800
+ 0.0
801
+ ],
802
+ "max": [
803
+ 0.0,
804
+ 0.0,
805
+ 0.0,
806
+ 0.0,
807
+ 0.0,
808
+ 0.0,
809
+ 0.0
810
+ ],
811
+ "mean": [
812
+ 0.0,
813
+ 0.0,
814
+ 0.0,
815
+ 0.0,
816
+ 0.0,
817
+ 0.0,
818
+ 0.0
819
+ ],
820
+ "std": [
821
+ 0.0,
822
+ 0.0,
823
+ 0.0,
824
+ 0.0,
825
+ 0.0,
826
+ 0.0,
827
+ 0.0
828
+ ],
829
+ "q01": [
830
+ 0.0,
831
+ 0.0,
832
+ 0.0,
833
+ 0.0,
834
+ 0.0,
835
+ 0.0,
836
+ 0.0
837
+ ],
838
+ "q99": [
839
+ 0.0,
840
+ 0.0,
841
+ 0.0,
842
+ 0.0,
843
+ 0.0,
844
+ 0.0,
845
+ 0.0
846
+ ]
847
+ },
848
+ "right_hand_joints": {
849
+ "min": [
850
+ 0.0,
851
+ 0.0,
852
+ 0.0,
853
+ 0.0,
854
+ 0.0,
855
+ 0.0,
856
+ 0.0
857
+ ],
858
+ "max": [
859
+ 0.0,
860
+ 0.0,
861
+ 0.0,
862
+ 0.0,
863
+ 0.0,
864
+ 0.0,
865
+ 0.0
866
+ ],
867
+ "mean": [
868
+ 0.0,
869
+ 0.0,
870
+ 0.0,
871
+ 0.0,
872
+ 0.0,
873
+ 0.0,
874
+ 0.0
875
+ ],
876
+ "std": [
877
+ 0.0,
878
+ 0.0,
879
+ 0.0,
880
+ 0.0,
881
+ 0.0,
882
+ 0.0,
883
+ 0.0
884
+ ],
885
+ "q01": [
886
+ 0.0,
887
+ 0.0,
888
+ 0.0,
889
+ 0.0,
890
+ 0.0,
891
+ 0.0,
892
+ 0.0
893
+ ],
894
+ "q99": [
895
+ 0.0,
896
+ 0.0,
897
+ 0.0,
898
+ 0.0,
899
+ 0.0,
900
+ 0.0,
901
+ 0.0
902
+ ]
903
+ }
904
+ },
905
+ "relative_action": {}
906
+ }
907
+ }
checkpoint-1000/experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d7",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Cosmos-Reason2-2B",
5
+ "backbone_model_type": "qwen",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 0,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": false,
15
+ "backbone_trainable_params_fp32": true,
16
+ "extra_augmentation_config": null,
17
+ "apply_sincos_state_encoding": false,
18
+ "use_percentiles": true,
19
+ "use_relative_action": false,
20
+ "max_state_dim": 132,
21
+ "max_action_dim": 132,
22
+ "action_horizon": 40,
23
+ "hidden_size": 1024,
24
+ "input_embedding_dim": 1536,
25
+ "state_history_length": 1,
26
+ "add_pos_embed": true,
27
+ "attn_dropout": 0.2,
28
+ "use_vlln": true,
29
+ "max_seq_len": 1024,
30
+ "use_alternate_vl_dit": true,
31
+ "attend_text_every_n_blocks": 2,
32
+ "diffusion_model_cfg": {
33
+ "attention_head_dim": 48,
34
+ "dropout": 0.2,
35
+ "final_dropout": true,
36
+ "interleave_self_attention": true,
37
+ "norm_type": "ada_norm",
38
+ "num_attention_heads": 32,
39
+ "num_layers": 32,
40
+ "output_dim": 1024,
41
+ "positional_embeddings": null
42
+ },
43
+ "num_inference_timesteps": 4,
44
+ "noise_beta_alpha": 1.5,
45
+ "noise_beta_beta": 1.0,
46
+ "noise_s": 0.999,
47
+ "num_timestep_buckets": 1000,
48
+ "tune_projector": true,
49
+ "tune_diffusion_model": true,
50
+ "tune_vlln": true,
51
+ "state_dropout_prob": 0.2,
52
+ "exclude_state": false,
53
+ "use_mean_std": false,
54
+ "max_num_embodiments": 32
55
+ }
checkpoint-1000/experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbcaea5ee88f1e0f1465043920a2647c67e7de17d24adfd1c477742a6168edec
3
+ size 4986649584
checkpoint-1000/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bcef826b7c7611f2614164b09d887149b63b19b08f8023870f5cbc6fb6cf27
3
+ size 4970792616
checkpoint-1000/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c250b4f8e6f7ebeb3ecb7ed40a2de4da7784c132a939f3d79c1812332ff90e
3
+ size 2618758696
checkpoint-1000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb18022bdf1d7ba52357186f60baecfec0fb063ece267ca5f7dc5daeab48ea9f
3
+ size 12964594710
checkpoint-1000/processor_config.json ADDED
@@ -0,0 +1,1159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d7Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "real_g1_relative_eef_relative_joints": {
6
+ "video": {
7
+ "delta_indices": [
8
+ -20,
9
+ 0
10
+ ],
11
+ "modality_keys": [
12
+ "ego_view"
13
+ ],
14
+ "sin_cos_embedding_keys": null,
15
+ "mean_std_embedding_keys": null,
16
+ "action_configs": null
17
+ },
18
+ "state": {
19
+ "delta_indices": [
20
+ 0
21
+ ],
22
+ "modality_keys": [
23
+ "left_wrist_eef_9d",
24
+ "right_wrist_eef_9d",
25
+ "left_hand",
26
+ "right_hand",
27
+ "left_arm",
28
+ "right_arm",
29
+ "waist"
30
+ ],
31
+ "sin_cos_embedding_keys": null,
32
+ "mean_std_embedding_keys": null,
33
+ "action_configs": null
34
+ },
35
+ "action": {
36
+ "delta_indices": [
37
+ 0,
38
+ 1,
39
+ 2,
40
+ 3,
41
+ 4,
42
+ 5,
43
+ 6,
44
+ 7,
45
+ 8,
46
+ 9,
47
+ 10,
48
+ 11,
49
+ 12,
50
+ 13,
51
+ 14,
52
+ 15,
53
+ 16,
54
+ 17,
55
+ 18,
56
+ 19,
57
+ 20,
58
+ 21,
59
+ 22,
60
+ 23,
61
+ 24,
62
+ 25,
63
+ 26,
64
+ 27,
65
+ 28,
66
+ 29,
67
+ 30,
68
+ 31,
69
+ 32,
70
+ 33,
71
+ 34,
72
+ 35,
73
+ 36,
74
+ 37,
75
+ 38,
76
+ 39
77
+ ],
78
+ "modality_keys": [
79
+ "left_wrist_eef_9d",
80
+ "right_wrist_eef_9d",
81
+ "left_hand",
82
+ "right_hand",
83
+ "left_arm",
84
+ "right_arm",
85
+ "waist",
86
+ "base_height_command",
87
+ "navigate_command"
88
+ ],
89
+ "sin_cos_embedding_keys": null,
90
+ "mean_std_embedding_keys": null,
91
+ "action_configs": [
92
+ {
93
+ "rep": "RELATIVE",
94
+ "type": "EEF",
95
+ "format": "XYZ_ROT6D",
96
+ "state_key": "left_wrist_eef_9d"
97
+ },
98
+ {
99
+ "rep": "RELATIVE",
100
+ "type": "EEF",
101
+ "format": "XYZ_ROT6D",
102
+ "state_key": "right_wrist_eef_9d"
103
+ },
104
+ {
105
+ "rep": "ABSOLUTE",
106
+ "type": "NON_EEF",
107
+ "format": "DEFAULT",
108
+ "state_key": "left_hand"
109
+ },
110
+ {
111
+ "rep": "ABSOLUTE",
112
+ "type": "NON_EEF",
113
+ "format": "DEFAULT",
114
+ "state_key": "right_hand"
115
+ },
116
+ {
117
+ "rep": "RELATIVE",
118
+ "type": "NON_EEF",
119
+ "format": "DEFAULT",
120
+ "state_key": "left_arm"
121
+ },
122
+ {
123
+ "rep": "RELATIVE",
124
+ "type": "NON_EEF",
125
+ "format": "DEFAULT",
126
+ "state_key": "right_arm"
127
+ },
128
+ {
129
+ "rep": "ABSOLUTE",
130
+ "type": "NON_EEF",
131
+ "format": "DEFAULT",
132
+ "state_key": "waist"
133
+ },
134
+ {
135
+ "rep": "ABSOLUTE",
136
+ "type": "NON_EEF",
137
+ "format": "DEFAULT",
138
+ "state_key": "base_height_command"
139
+ },
140
+ {
141
+ "rep": "ABSOLUTE",
142
+ "type": "NON_EEF",
143
+ "format": "DEFAULT",
144
+ "state_key": "navigate_command"
145
+ }
146
+ ]
147
+ },
148
+ "language": {
149
+ "delta_indices": [
150
+ 0
151
+ ],
152
+ "modality_keys": [
153
+ "annotation.human.task_description"
154
+ ],
155
+ "sin_cos_embedding_keys": null,
156
+ "mean_std_embedding_keys": null,
157
+ "action_configs": null
158
+ }
159
+ },
160
+ "real_r1_pro_sharpa_relative_eef_mecka": {
161
+ "video": {
162
+ "delta_indices": [
163
+ -30,
164
+ 0
165
+ ],
166
+ "modality_keys": [
167
+ "ego_view_cropratio_res320x240_freq30"
168
+ ],
169
+ "sin_cos_embedding_keys": null,
170
+ "mean_std_embedding_keys": null,
171
+ "action_configs": null
172
+ },
173
+ "state": {
174
+ "delta_indices": [
175
+ 0
176
+ ],
177
+ "modality_keys": [
178
+ "left_wrist_eef",
179
+ "right_wrist_eef",
180
+ "left_hand_joints",
181
+ "right_hand_joints"
182
+ ],
183
+ "sin_cos_embedding_keys": null,
184
+ "mean_std_embedding_keys": null,
185
+ "action_configs": null
186
+ },
187
+ "action": {
188
+ "delta_indices": [
189
+ 0,
190
+ 1,
191
+ 2,
192
+ 3,
193
+ 4,
194
+ 5,
195
+ 6,
196
+ 7,
197
+ 8,
198
+ 9,
199
+ 10,
200
+ 11,
201
+ 12,
202
+ 13,
203
+ 14,
204
+ 15,
205
+ 16,
206
+ 17,
207
+ 18,
208
+ 19,
209
+ 20,
210
+ 21,
211
+ 22,
212
+ 23,
213
+ 24,
214
+ 25,
215
+ 26,
216
+ 27,
217
+ 28,
218
+ 29,
219
+ 30,
220
+ 31,
221
+ 32,
222
+ 33,
223
+ 34,
224
+ 35,
225
+ 36,
226
+ 37,
227
+ 38,
228
+ 39
229
+ ],
230
+ "modality_keys": [
231
+ "left_wrist_eef",
232
+ "right_wrist_eef",
233
+ "left_hand_joints",
234
+ "right_hand_joints"
235
+ ],
236
+ "sin_cos_embedding_keys": null,
237
+ "mean_std_embedding_keys": null,
238
+ "action_configs": [
239
+ {
240
+ "rep": "RELATIVE",
241
+ "type": "EEF",
242
+ "format": "XYZ_ROT6D",
243
+ "state_key": "left_wrist_eef"
244
+ },
245
+ {
246
+ "rep": "RELATIVE",
247
+ "type": "EEF",
248
+ "format": "XYZ_ROT6D",
249
+ "state_key": "right_wrist_eef"
250
+ },
251
+ {
252
+ "rep": "ABSOLUTE",
253
+ "type": "NON_EEF",
254
+ "format": "DEFAULT",
255
+ "state_key": "left_hand_joints"
256
+ },
257
+ {
258
+ "rep": "ABSOLUTE",
259
+ "type": "NON_EEF",
260
+ "format": "DEFAULT",
261
+ "state_key": "right_hand_joints"
262
+ }
263
+ ]
264
+ },
265
+ "language": {
266
+ "delta_indices": [
267
+ 0
268
+ ],
269
+ "modality_keys": [
270
+ "annotation.human.coarse_action"
271
+ ],
272
+ "sin_cos_embedding_keys": null,
273
+ "mean_std_embedding_keys": null,
274
+ "action_configs": null
275
+ }
276
+ },
277
+ "real_r1_pro_sharpa_relative_eef_human": {
278
+ "video": {
279
+ "delta_indices": [
280
+ -20,
281
+ 0
282
+ ],
283
+ "modality_keys": [
284
+ "ego_view_res320x240_freq20",
285
+ "left_wrist_view_res320x240_freq20",
286
+ "right_wrist_view_res320x240_freq20"
287
+ ],
288
+ "sin_cos_embedding_keys": null,
289
+ "mean_std_embedding_keys": null,
290
+ "action_configs": null
291
+ },
292
+ "state": {
293
+ "delta_indices": [
294
+ 0
295
+ ],
296
+ "modality_keys": [
297
+ "left_wrist_eef",
298
+ "right_wrist_eef",
299
+ "left_hand_joints",
300
+ "right_hand_joints"
301
+ ],
302
+ "sin_cos_embedding_keys": null,
303
+ "mean_std_embedding_keys": null,
304
+ "action_configs": null
305
+ },
306
+ "action": {
307
+ "delta_indices": [
308
+ 0,
309
+ 1,
310
+ 2,
311
+ 3,
312
+ 4,
313
+ 5,
314
+ 6,
315
+ 7,
316
+ 8,
317
+ 9,
318
+ 10,
319
+ 11,
320
+ 12,
321
+ 13,
322
+ 14,
323
+ 15,
324
+ 16,
325
+ 17,
326
+ 18,
327
+ 19,
328
+ 20,
329
+ 21,
330
+ 22,
331
+ 23,
332
+ 24,
333
+ 25,
334
+ 26,
335
+ 27,
336
+ 28,
337
+ 29,
338
+ 30,
339
+ 31,
340
+ 32,
341
+ 33,
342
+ 34,
343
+ 35,
344
+ 36,
345
+ 37,
346
+ 38,
347
+ 39
348
+ ],
349
+ "modality_keys": [
350
+ "left_wrist_eef",
351
+ "right_wrist_eef",
352
+ "left_hand_joints",
353
+ "right_hand_joints"
354
+ ],
355
+ "sin_cos_embedding_keys": null,
356
+ "mean_std_embedding_keys": null,
357
+ "action_configs": [
358
+ {
359
+ "rep": "RELATIVE",
360
+ "type": "EEF",
361
+ "format": "XYZ_ROT6D",
362
+ "state_key": "left_wrist_eef"
363
+ },
364
+ {
365
+ "rep": "RELATIVE",
366
+ "type": "EEF",
367
+ "format": "XYZ_ROT6D",
368
+ "state_key": "right_wrist_eef"
369
+ },
370
+ {
371
+ "rep": "ABSOLUTE",
372
+ "type": "NON_EEF",
373
+ "format": "DEFAULT",
374
+ "state_key": "left_hand_joints"
375
+ },
376
+ {
377
+ "rep": "ABSOLUTE",
378
+ "type": "NON_EEF",
379
+ "format": "DEFAULT",
380
+ "state_key": "right_hand_joints"
381
+ }
382
+ ]
383
+ },
384
+ "language": {
385
+ "delta_indices": [
386
+ 0
387
+ ],
388
+ "modality_keys": [
389
+ "annotation.human.coarse_action"
390
+ ],
391
+ "sin_cos_embedding_keys": null,
392
+ "mean_std_embedding_keys": null,
393
+ "action_configs": null
394
+ }
395
+ },
396
+ "real_r1_pro_sharpa_relative_eef": {
397
+ "video": {
398
+ "delta_indices": [
399
+ -20,
400
+ 0
401
+ ],
402
+ "modality_keys": [
403
+ "ego_view_res320x240_freq20",
404
+ "left_wrist_view_res320x240_freq20",
405
+ "right_wrist_view_res320x240_freq20"
406
+ ],
407
+ "sin_cos_embedding_keys": null,
408
+ "mean_std_embedding_keys": null,
409
+ "action_configs": null
410
+ },
411
+ "state": {
412
+ "delta_indices": [
413
+ 0
414
+ ],
415
+ "modality_keys": [
416
+ "left_wrist_eef",
417
+ "right_wrist_eef",
418
+ "left_hand_joints",
419
+ "right_hand_joints"
420
+ ],
421
+ "sin_cos_embedding_keys": null,
422
+ "mean_std_embedding_keys": null,
423
+ "action_configs": null
424
+ },
425
+ "action": {
426
+ "delta_indices": [
427
+ 0,
428
+ 1,
429
+ 2,
430
+ 3,
431
+ 4,
432
+ 5,
433
+ 6,
434
+ 7,
435
+ 8,
436
+ 9,
437
+ 10,
438
+ 11,
439
+ 12,
440
+ 13,
441
+ 14,
442
+ 15,
443
+ 16,
444
+ 17,
445
+ 18,
446
+ 19,
447
+ 20,
448
+ 21,
449
+ 22,
450
+ 23,
451
+ 24,
452
+ 25,
453
+ 26,
454
+ 27,
455
+ 28,
456
+ 29,
457
+ 30,
458
+ 31,
459
+ 32,
460
+ 33,
461
+ 34,
462
+ 35,
463
+ 36,
464
+ 37,
465
+ 38,
466
+ 39
467
+ ],
468
+ "modality_keys": [
469
+ "left_wrist_eef",
470
+ "right_wrist_eef",
471
+ "left_hand_joints",
472
+ "right_hand_joints"
473
+ ],
474
+ "sin_cos_embedding_keys": null,
475
+ "mean_std_embedding_keys": null,
476
+ "action_configs": [
477
+ {
478
+ "rep": "RELATIVE",
479
+ "type": "EEF",
480
+ "format": "XYZ_ROT6D",
481
+ "state_key": "left_wrist_eef"
482
+ },
483
+ {
484
+ "rep": "RELATIVE",
485
+ "type": "EEF",
486
+ "format": "XYZ_ROT6D",
487
+ "state_key": "right_wrist_eef"
488
+ },
489
+ {
490
+ "rep": "ABSOLUTE",
491
+ "type": "NON_EEF",
492
+ "format": "DEFAULT",
493
+ "state_key": "left_hand_joints"
494
+ },
495
+ {
496
+ "rep": "ABSOLUTE",
497
+ "type": "NON_EEF",
498
+ "format": "DEFAULT",
499
+ "state_key": "right_hand_joints"
500
+ }
501
+ ]
502
+ },
503
+ "language": {
504
+ "delta_indices": [
505
+ 0
506
+ ],
507
+ "modality_keys": [
508
+ "annotation.human.coarse_action"
509
+ ],
510
+ "sin_cos_embedding_keys": null,
511
+ "mean_std_embedding_keys": null,
512
+ "action_configs": null
513
+ }
514
+ },
515
+ "xdof_relative_eef_relative_joint": {
516
+ "video": {
517
+ "delta_indices": [
518
+ -30,
519
+ 0
520
+ ],
521
+ "modality_keys": [
522
+ "top_camera-images-rgb_320_240",
523
+ "left_camera-images-rgb_320_240",
524
+ "right_camera-images-rgb_320_240"
525
+ ],
526
+ "sin_cos_embedding_keys": null,
527
+ "mean_std_embedding_keys": null,
528
+ "action_configs": null
529
+ },
530
+ "state": {
531
+ "delta_indices": [
532
+ 0
533
+ ],
534
+ "modality_keys": [
535
+ "left_wrist_eef",
536
+ "right_wrist_eef",
537
+ "left_gripper_pos",
538
+ "right_gripper_pos",
539
+ "left_joint_pos",
540
+ "right_joint_pos"
541
+ ],
542
+ "sin_cos_embedding_keys": null,
543
+ "mean_std_embedding_keys": null,
544
+ "action_configs": null
545
+ },
546
+ "action": {
547
+ "delta_indices": [
548
+ 0,
549
+ 1,
550
+ 2,
551
+ 3,
552
+ 4,
553
+ 5,
554
+ 6,
555
+ 7,
556
+ 8,
557
+ 9,
558
+ 10,
559
+ 11,
560
+ 12,
561
+ 13,
562
+ 14,
563
+ 15,
564
+ 16,
565
+ 17,
566
+ 18,
567
+ 19,
568
+ 20,
569
+ 21,
570
+ 22,
571
+ 23,
572
+ 24,
573
+ 25,
574
+ 26,
575
+ 27,
576
+ 28,
577
+ 29,
578
+ 30,
579
+ 31,
580
+ 32,
581
+ 33,
582
+ 34,
583
+ 35,
584
+ 36,
585
+ 37,
586
+ 38,
587
+ 39
588
+ ],
589
+ "modality_keys": [
590
+ "left_wrist_eef",
591
+ "right_wrist_eef",
592
+ "left_gripper_pos",
593
+ "right_gripper_pos",
594
+ "left_joint_pos",
595
+ "right_joint_pos"
596
+ ],
597
+ "sin_cos_embedding_keys": null,
598
+ "mean_std_embedding_keys": null,
599
+ "action_configs": [
600
+ {
601
+ "rep": "RELATIVE",
602
+ "type": "EEF",
603
+ "format": "XYZ_ROT6D",
604
+ "state_key": "left_wrist_eef"
605
+ },
606
+ {
607
+ "rep": "RELATIVE",
608
+ "type": "EEF",
609
+ "format": "XYZ_ROT6D",
610
+ "state_key": "right_wrist_eef"
611
+ },
612
+ {
613
+ "rep": "ABSOLUTE",
614
+ "type": "NON_EEF",
615
+ "format": "DEFAULT",
616
+ "state_key": "left_gripper_pos"
617
+ },
618
+ {
619
+ "rep": "ABSOLUTE",
620
+ "type": "NON_EEF",
621
+ "format": "DEFAULT",
622
+ "state_key": "right_gripper_pos"
623
+ },
624
+ {
625
+ "rep": "RELATIVE",
626
+ "type": "NON_EEF",
627
+ "format": "DEFAULT",
628
+ "state_key": "left_joint_pos"
629
+ },
630
+ {
631
+ "rep": "RELATIVE",
632
+ "type": "NON_EEF",
633
+ "format": "DEFAULT",
634
+ "state_key": "right_joint_pos"
635
+ }
636
+ ]
637
+ },
638
+ "language": {
639
+ "delta_indices": [
640
+ 0
641
+ ],
642
+ "modality_keys": [
643
+ "annotation.task"
644
+ ],
645
+ "sin_cos_embedding_keys": null,
646
+ "mean_std_embedding_keys": null,
647
+ "action_configs": null
648
+ }
649
+ },
650
+ "real_r1_pro_sharpa_relative_eef_maxinsights": {
651
+ "video": {
652
+ "delta_indices": [
653
+ -30,
654
+ 0
655
+ ],
656
+ "modality_keys": [
657
+ "ego_view_cropratio_res320x240_freq30"
658
+ ],
659
+ "sin_cos_embedding_keys": null,
660
+ "mean_std_embedding_keys": null,
661
+ "action_configs": null
662
+ },
663
+ "state": {
664
+ "delta_indices": [
665
+ 0
666
+ ],
667
+ "modality_keys": [
668
+ "left_wrist_eef",
669
+ "right_wrist_eef",
670
+ "left_hand_joints",
671
+ "right_hand_joints"
672
+ ],
673
+ "sin_cos_embedding_keys": null,
674
+ "mean_std_embedding_keys": null,
675
+ "action_configs": null
676
+ },
677
+ "action": {
678
+ "delta_indices": [
679
+ 0,
680
+ 1,
681
+ 2,
682
+ 3,
683
+ 4,
684
+ 5,
685
+ 6,
686
+ 7,
687
+ 8,
688
+ 9,
689
+ 10,
690
+ 11,
691
+ 12,
692
+ 13,
693
+ 14,
694
+ 15,
695
+ 16,
696
+ 17,
697
+ 18,
698
+ 19,
699
+ 20,
700
+ 21,
701
+ 22,
702
+ 23,
703
+ 24,
704
+ 25,
705
+ 26,
706
+ 27,
707
+ 28,
708
+ 29,
709
+ 30,
710
+ 31,
711
+ 32,
712
+ 33,
713
+ 34,
714
+ 35,
715
+ 36,
716
+ 37,
717
+ 38,
718
+ 39
719
+ ],
720
+ "modality_keys": [
721
+ "left_wrist_eef",
722
+ "right_wrist_eef",
723
+ "left_hand_joints",
724
+ "right_hand_joints"
725
+ ],
726
+ "sin_cos_embedding_keys": null,
727
+ "mean_std_embedding_keys": null,
728
+ "action_configs": [
729
+ {
730
+ "rep": "RELATIVE",
731
+ "type": "EEF",
732
+ "format": "XYZ_ROT6D",
733
+ "state_key": "left_wrist_eef"
734
+ },
735
+ {
736
+ "rep": "RELATIVE",
737
+ "type": "EEF",
738
+ "format": "XYZ_ROT6D",
739
+ "state_key": "right_wrist_eef"
740
+ },
741
+ {
742
+ "rep": "ABSOLUTE",
743
+ "type": "NON_EEF",
744
+ "format": "DEFAULT",
745
+ "state_key": "left_hand_joints"
746
+ },
747
+ {
748
+ "rep": "ABSOLUTE",
749
+ "type": "NON_EEF",
750
+ "format": "DEFAULT",
751
+ "state_key": "right_hand_joints"
752
+ }
753
+ ]
754
+ },
755
+ "language": {
756
+ "delta_indices": [
757
+ 0
758
+ ],
759
+ "modality_keys": [
760
+ "annotation.human.coarse_action"
761
+ ],
762
+ "sin_cos_embedding_keys": null,
763
+ "mean_std_embedding_keys": null,
764
+ "action_configs": null
765
+ }
766
+ },
767
+ "xdof_relative_eef_relative_joint_subtask": {
768
+ "video": {
769
+ "delta_indices": [
770
+ -30,
771
+ 0
772
+ ],
773
+ "modality_keys": [
774
+ "top_camera-images-rgb_320_240",
775
+ "left_camera-images-rgb_320_240",
776
+ "right_camera-images-rgb_320_240"
777
+ ],
778
+ "sin_cos_embedding_keys": null,
779
+ "mean_std_embedding_keys": null,
780
+ "action_configs": null
781
+ },
782
+ "state": {
783
+ "delta_indices": [
784
+ 0
785
+ ],
786
+ "modality_keys": [
787
+ "left_wrist_eef",
788
+ "right_wrist_eef",
789
+ "left_gripper_pos",
790
+ "right_gripper_pos",
791
+ "left_joint_pos",
792
+ "right_joint_pos"
793
+ ],
794
+ "sin_cos_embedding_keys": null,
795
+ "mean_std_embedding_keys": null,
796
+ "action_configs": null
797
+ },
798
+ "action": {
799
+ "delta_indices": [
800
+ 0,
801
+ 1,
802
+ 2,
803
+ 3,
804
+ 4,
805
+ 5,
806
+ 6,
807
+ 7,
808
+ 8,
809
+ 9,
810
+ 10,
811
+ 11,
812
+ 12,
813
+ 13,
814
+ 14,
815
+ 15,
816
+ 16,
817
+ 17,
818
+ 18,
819
+ 19,
820
+ 20,
821
+ 21,
822
+ 22,
823
+ 23,
824
+ 24,
825
+ 25,
826
+ 26,
827
+ 27,
828
+ 28,
829
+ 29,
830
+ 30,
831
+ 31,
832
+ 32,
833
+ 33,
834
+ 34,
835
+ 35,
836
+ 36,
837
+ 37,
838
+ 38,
839
+ 39
840
+ ],
841
+ "modality_keys": [
842
+ "left_wrist_eef",
843
+ "right_wrist_eef",
844
+ "left_gripper_pos",
845
+ "right_gripper_pos",
846
+ "left_joint_pos",
847
+ "right_joint_pos"
848
+ ],
849
+ "sin_cos_embedding_keys": null,
850
+ "mean_std_embedding_keys": null,
851
+ "action_configs": [
852
+ {
853
+ "rep": "RELATIVE",
854
+ "type": "EEF",
855
+ "format": "XYZ_ROT6D",
856
+ "state_key": "left_wrist_eef"
857
+ },
858
+ {
859
+ "rep": "RELATIVE",
860
+ "type": "EEF",
861
+ "format": "XYZ_ROT6D",
862
+ "state_key": "right_wrist_eef"
863
+ },
864
+ {
865
+ "rep": "ABSOLUTE",
866
+ "type": "NON_EEF",
867
+ "format": "DEFAULT",
868
+ "state_key": "left_gripper_pos"
869
+ },
870
+ {
871
+ "rep": "ABSOLUTE",
872
+ "type": "NON_EEF",
873
+ "format": "DEFAULT",
874
+ "state_key": "right_gripper_pos"
875
+ },
876
+ {
877
+ "rep": "RELATIVE",
878
+ "type": "NON_EEF",
879
+ "format": "DEFAULT",
880
+ "state_key": "left_joint_pos"
881
+ },
882
+ {
883
+ "rep": "RELATIVE",
884
+ "type": "NON_EEF",
885
+ "format": "DEFAULT",
886
+ "state_key": "right_joint_pos"
887
+ }
888
+ ]
889
+ },
890
+ "language": {
891
+ "delta_indices": [
892
+ 0
893
+ ],
894
+ "modality_keys": [
895
+ "annotation.sub_task"
896
+ ],
897
+ "sin_cos_embedding_keys": null,
898
+ "mean_std_embedding_keys": null,
899
+ "action_configs": null
900
+ }
901
+ },
902
+ "oxe_droid_relative_eef_relative_joint": {
903
+ "video": {
904
+ "delta_indices": [
905
+ -15,
906
+ 0
907
+ ],
908
+ "modality_keys": [
909
+ "exterior_image_1_left",
910
+ "wrist_image_left"
911
+ ],
912
+ "sin_cos_embedding_keys": null,
913
+ "mean_std_embedding_keys": null,
914
+ "action_configs": null
915
+ },
916
+ "state": {
917
+ "delta_indices": [
918
+ 0
919
+ ],
920
+ "modality_keys": [
921
+ "eef_9d",
922
+ "gripper_position",
923
+ "joint_position"
924
+ ],
925
+ "sin_cos_embedding_keys": null,
926
+ "mean_std_embedding_keys": null,
927
+ "action_configs": null
928
+ },
929
+ "action": {
930
+ "delta_indices": [
931
+ 0,
932
+ 1,
933
+ 2,
934
+ 3,
935
+ 4,
936
+ 5,
937
+ 6,
938
+ 7,
939
+ 8,
940
+ 9,
941
+ 10,
942
+ 11,
943
+ 12,
944
+ 13,
945
+ 14,
946
+ 15,
947
+ 16,
948
+ 17,
949
+ 18,
950
+ 19,
951
+ 20,
952
+ 21,
953
+ 22,
954
+ 23,
955
+ 24,
956
+ 25,
957
+ 26,
958
+ 27,
959
+ 28,
960
+ 29,
961
+ 30,
962
+ 31,
963
+ 32,
964
+ 33,
965
+ 34,
966
+ 35,
967
+ 36,
968
+ 37,
969
+ 38,
970
+ 39
971
+ ],
972
+ "modality_keys": [
973
+ "eef_9d",
974
+ "gripper_position",
975
+ "joint_position"
976
+ ],
977
+ "sin_cos_embedding_keys": null,
978
+ "mean_std_embedding_keys": null,
979
+ "action_configs": [
980
+ {
981
+ "rep": "RELATIVE",
982
+ "type": "EEF",
983
+ "format": "XYZ_ROT6D",
984
+ "state_key": "eef_9d"
985
+ },
986
+ {
987
+ "rep": "ABSOLUTE",
988
+ "type": "NON_EEF",
989
+ "format": "DEFAULT",
990
+ "state_key": "gripper_position"
991
+ },
992
+ {
993
+ "rep": "RELATIVE",
994
+ "type": "NON_EEF",
995
+ "format": "DEFAULT",
996
+ "state_key": "joint_position"
997
+ }
998
+ ]
999
+ },
1000
+ "language": {
1001
+ "delta_indices": [
1002
+ 0
1003
+ ],
1004
+ "modality_keys": [
1005
+ "annotation.language.language_instruction"
1006
+ ],
1007
+ "sin_cos_embedding_keys": null,
1008
+ "mean_std_embedding_keys": null,
1009
+ "action_configs": null
1010
+ }
1011
+ },
1012
+ "unitree_g1_sonic": {
1013
+ "video": {
1014
+ "delta_indices": [
1015
+ 0
1016
+ ],
1017
+ "modality_keys": [
1018
+ "ego_view"
1019
+ ],
1020
+ "sin_cos_embedding_keys": null,
1021
+ "mean_std_embedding_keys": null,
1022
+ "action_configs": null
1023
+ },
1024
+ "state": {
1025
+ "delta_indices": [
1026
+ 0
1027
+ ],
1028
+ "modality_keys": [
1029
+ "left_leg",
1030
+ "right_leg",
1031
+ "waist",
1032
+ "left_arm",
1033
+ "right_arm",
1034
+ "left_hand",
1035
+ "right_hand",
1036
+ "projected_gravity"
1037
+ ],
1038
+ "sin_cos_embedding_keys": null,
1039
+ "mean_std_embedding_keys": null,
1040
+ "action_configs": null
1041
+ },
1042
+ "action": {
1043
+ "delta_indices": [
1044
+ 0,
1045
+ 1,
1046
+ 2,
1047
+ 3,
1048
+ 4,
1049
+ 5,
1050
+ 6,
1051
+ 7,
1052
+ 8,
1053
+ 9,
1054
+ 10,
1055
+ 11,
1056
+ 12,
1057
+ 13,
1058
+ 14,
1059
+ 15,
1060
+ 16,
1061
+ 17,
1062
+ 18,
1063
+ 19,
1064
+ 20,
1065
+ 21,
1066
+ 22,
1067
+ 23,
1068
+ 24,
1069
+ 25,
1070
+ 26,
1071
+ 27,
1072
+ 28,
1073
+ 29,
1074
+ 30,
1075
+ 31,
1076
+ 32,
1077
+ 33,
1078
+ 34,
1079
+ 35,
1080
+ 36,
1081
+ 37,
1082
+ 38,
1083
+ 39
1084
+ ],
1085
+ "modality_keys": [
1086
+ "motion_token",
1087
+ "left_hand_joints",
1088
+ "right_hand_joints"
1089
+ ],
1090
+ "sin_cos_embedding_keys": null,
1091
+ "mean_std_embedding_keys": null,
1092
+ "action_configs": [
1093
+ {
1094
+ "rep": "ABSOLUTE",
1095
+ "type": "NON_EEF",
1096
+ "format": "DEFAULT",
1097
+ "state_key": null
1098
+ },
1099
+ {
1100
+ "rep": "ABSOLUTE",
1101
+ "type": "NON_EEF",
1102
+ "format": "DEFAULT",
1103
+ "state_key": null
1104
+ },
1105
+ {
1106
+ "rep": "ABSOLUTE",
1107
+ "type": "NON_EEF",
1108
+ "format": "DEFAULT",
1109
+ "state_key": null
1110
+ }
1111
+ ]
1112
+ },
1113
+ "language": {
1114
+ "delta_indices": [
1115
+ 0
1116
+ ],
1117
+ "modality_keys": [
1118
+ "annotation.human.task_description"
1119
+ ],
1120
+ "sin_cos_embedding_keys": null,
1121
+ "mean_std_embedding_keys": null,
1122
+ "action_configs": null
1123
+ }
1124
+ }
1125
+ },
1126
+ "image_crop_size": [
1127
+ 230,
1128
+ 230
1129
+ ],
1130
+ "image_target_size": [
1131
+ 256,
1132
+ 256
1133
+ ],
1134
+ "use_albumentations": true,
1135
+ "random_rotation_angle": 0,
1136
+ "color_jitter_params": {
1137
+ "brightness": 0.3,
1138
+ "contrast": 0.4,
1139
+ "saturation": 0.5,
1140
+ "hue": 0.08
1141
+ },
1142
+ "shortest_image_edge": 256,
1143
+ "crop_fraction": 0.95,
1144
+ "letter_box_transform": false,
1145
+ "model_name": "nvidia/Cosmos-Reason2-2B",
1146
+ "model_type": "qwen",
1147
+ "formalize_language": true,
1148
+ "max_state_dim": 132,
1149
+ "max_action_dim": 132,
1150
+ "max_action_horizon": 40,
1151
+ "use_percentiles": true,
1152
+ "use_mean_std": false,
1153
+ "clip_outliers": true,
1154
+ "apply_sincos_state_encoding": false,
1155
+ "use_relative_action": true,
1156
+ "exclude_state": false,
1157
+ "state_dropout_prob": 0.2
1158
+ }
1159
+ }
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c289132373457b8c2fe1131b487a86b6e2f3d29d06076f0519a266bd350710
3
+ size 14645
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5dcdc121951b51745034e467890f8a7b69b505c12b1a668d378806746fee59
3
+ size 1465
checkpoint-1000/statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,642 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.1,
6
+ "eval_steps": 500,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "grad_norm": 0.13945120573043823,
14
+ "learning_rate": 1.8e-06,
15
+ "loss": 1.1792,
16
+ "step": 10
17
+ },
18
+ {
19
+ "grad_norm": 0.14891666173934937,
20
+ "learning_rate": 3.8e-06,
21
+ "loss": 1.1842,
22
+ "step": 20
23
+ },
24
+ {
25
+ "grad_norm": 0.15832659602165222,
26
+ "learning_rate": 5.8e-06,
27
+ "loss": 1.1695,
28
+ "step": 30
29
+ },
30
+ {
31
+ "grad_norm": 0.16203539073467255,
32
+ "learning_rate": 7.8e-06,
33
+ "loss": 1.1497,
34
+ "step": 40
35
+ },
36
+ {
37
+ "grad_norm": 0.2627524137496948,
38
+ "learning_rate": 9.800000000000001e-06,
39
+ "loss": 1.153,
40
+ "step": 50
41
+ },
42
+ {
43
+ "grad_norm": 0.2639051675796509,
44
+ "learning_rate": 1.18e-05,
45
+ "loss": 1.1328,
46
+ "step": 60
47
+ },
48
+ {
49
+ "grad_norm": 0.2185407280921936,
50
+ "learning_rate": 1.3800000000000002e-05,
51
+ "loss": 1.1178,
52
+ "step": 70
53
+ },
54
+ {
55
+ "grad_norm": 0.34473440051078796,
56
+ "learning_rate": 1.58e-05,
57
+ "loss": 1.1078,
58
+ "step": 80
59
+ },
60
+ {
61
+ "grad_norm": 0.18536631762981415,
62
+ "learning_rate": 1.78e-05,
63
+ "loss": 1.1055,
64
+ "step": 90
65
+ },
66
+ {
67
+ "grad_norm": 0.5128417611122131,
68
+ "learning_rate": 1.9800000000000004e-05,
69
+ "loss": 1.1067,
70
+ "step": 100
71
+ },
72
+ {
73
+ "grad_norm": 0.28990018367767334,
74
+ "learning_rate": 2.18e-05,
75
+ "loss": 1.1033,
76
+ "step": 110
77
+ },
78
+ {
79
+ "grad_norm": 0.21424825489521027,
80
+ "learning_rate": 2.38e-05,
81
+ "loss": 1.101,
82
+ "step": 120
83
+ },
84
+ {
85
+ "grad_norm": 0.2597063481807709,
86
+ "learning_rate": 2.58e-05,
87
+ "loss": 1.0932,
88
+ "step": 130
89
+ },
90
+ {
91
+ "grad_norm": 0.3148020803928375,
92
+ "learning_rate": 2.7800000000000005e-05,
93
+ "loss": 1.094,
94
+ "step": 140
95
+ },
96
+ {
97
+ "grad_norm": 0.3488495349884033,
98
+ "learning_rate": 2.98e-05,
99
+ "loss": 1.0821,
100
+ "step": 150
101
+ },
102
+ {
103
+ "grad_norm": 0.3580583333969116,
104
+ "learning_rate": 3.18e-05,
105
+ "loss": 1.0609,
106
+ "step": 160
107
+ },
108
+ {
109
+ "grad_norm": 0.4472530484199524,
110
+ "learning_rate": 3.38e-05,
111
+ "loss": 1.0338,
112
+ "step": 170
113
+ },
114
+ {
115
+ "grad_norm": 0.534553050994873,
116
+ "learning_rate": 3.58e-05,
117
+ "loss": 0.9983,
118
+ "step": 180
119
+ },
120
+ {
121
+ "grad_norm": 0.6643158197402954,
122
+ "learning_rate": 3.7800000000000004e-05,
123
+ "loss": 0.9619,
124
+ "step": 190
125
+ },
126
+ {
127
+ "grad_norm": 0.7085841298103333,
128
+ "learning_rate": 3.9800000000000005e-05,
129
+ "loss": 0.9224,
130
+ "step": 200
131
+ },
132
+ {
133
+ "grad_norm": 0.7006478905677795,
134
+ "learning_rate": 4.18e-05,
135
+ "loss": 0.8822,
136
+ "step": 210
137
+ },
138
+ {
139
+ "grad_norm": 1.0977967977523804,
140
+ "learning_rate": 4.38e-05,
141
+ "loss": 0.8636,
142
+ "step": 220
143
+ },
144
+ {
145
+ "grad_norm": 0.7735133767127991,
146
+ "learning_rate": 4.58e-05,
147
+ "loss": 0.8314,
148
+ "step": 230
149
+ },
150
+ {
151
+ "grad_norm": 1.304010033607483,
152
+ "learning_rate": 4.78e-05,
153
+ "loss": 0.8028,
154
+ "step": 240
155
+ },
156
+ {
157
+ "grad_norm": 0.858792245388031,
158
+ "learning_rate": 4.9800000000000004e-05,
159
+ "loss": 0.7667,
160
+ "step": 250
161
+ },
162
+ {
163
+ "grad_norm": 0.9611242413520813,
164
+ "learning_rate": 5.1800000000000005e-05,
165
+ "loss": 0.7465,
166
+ "step": 260
167
+ },
168
+ {
169
+ "grad_norm": 0.9728292226791382,
170
+ "learning_rate": 5.380000000000001e-05,
171
+ "loss": 0.7271,
172
+ "step": 270
173
+ },
174
+ {
175
+ "grad_norm": 0.9787989854812622,
176
+ "learning_rate": 5.580000000000001e-05,
177
+ "loss": 0.7047,
178
+ "step": 280
179
+ },
180
+ {
181
+ "grad_norm": 0.9946438074111938,
182
+ "learning_rate": 5.7799999999999995e-05,
183
+ "loss": 0.6934,
184
+ "step": 290
185
+ },
186
+ {
187
+ "grad_norm": 0.9978992342948914,
188
+ "learning_rate": 5.9800000000000003e-05,
189
+ "loss": 0.6607,
190
+ "step": 300
191
+ },
192
+ {
193
+ "grad_norm": 0.9351487755775452,
194
+ "learning_rate": 6.18e-05,
195
+ "loss": 0.6528,
196
+ "step": 310
197
+ },
198
+ {
199
+ "grad_norm": 1.142807960510254,
200
+ "learning_rate": 6.38e-05,
201
+ "loss": 0.6212,
202
+ "step": 320
203
+ },
204
+ {
205
+ "grad_norm": 0.9334923028945923,
206
+ "learning_rate": 6.58e-05,
207
+ "loss": 0.6125,
208
+ "step": 330
209
+ },
210
+ {
211
+ "grad_norm": 0.8837148547172546,
212
+ "learning_rate": 6.780000000000001e-05,
213
+ "loss": 0.5951,
214
+ "step": 340
215
+ },
216
+ {
217
+ "grad_norm": 0.9366855025291443,
218
+ "learning_rate": 6.98e-05,
219
+ "loss": 0.5671,
220
+ "step": 350
221
+ },
222
+ {
223
+ "grad_norm": 0.9530870318412781,
224
+ "learning_rate": 7.18e-05,
225
+ "loss": 0.5615,
226
+ "step": 360
227
+ },
228
+ {
229
+ "grad_norm": 1.0772392749786377,
230
+ "learning_rate": 7.38e-05,
231
+ "loss": 0.5644,
232
+ "step": 370
233
+ },
234
+ {
235
+ "grad_norm": 0.9228338003158569,
236
+ "learning_rate": 7.58e-05,
237
+ "loss": 0.55,
238
+ "step": 380
239
+ },
240
+ {
241
+ "grad_norm": 1.1477776765823364,
242
+ "learning_rate": 7.780000000000001e-05,
243
+ "loss": 0.5417,
244
+ "step": 390
245
+ },
246
+ {
247
+ "grad_norm": 0.9487908482551575,
248
+ "learning_rate": 7.98e-05,
249
+ "loss": 0.5299,
250
+ "step": 400
251
+ },
252
+ {
253
+ "grad_norm": 0.8648481965065002,
254
+ "learning_rate": 8.18e-05,
255
+ "loss": 0.5245,
256
+ "step": 410
257
+ },
258
+ {
259
+ "grad_norm": 1.0358718633651733,
260
+ "learning_rate": 8.38e-05,
261
+ "loss": 0.5066,
262
+ "step": 420
263
+ },
264
+ {
265
+ "grad_norm": 0.9835023283958435,
266
+ "learning_rate": 8.58e-05,
267
+ "loss": 0.5033,
268
+ "step": 430
269
+ },
270
+ {
271
+ "grad_norm": 0.9648961424827576,
272
+ "learning_rate": 8.78e-05,
273
+ "loss": 0.4882,
274
+ "step": 440
275
+ },
276
+ {
277
+ "grad_norm": 1.1135525703430176,
278
+ "learning_rate": 8.98e-05,
279
+ "loss": 0.4808,
280
+ "step": 450
281
+ },
282
+ {
283
+ "grad_norm": 0.9057552218437195,
284
+ "learning_rate": 9.180000000000001e-05,
285
+ "loss": 0.4701,
286
+ "step": 460
287
+ },
288
+ {
289
+ "grad_norm": 1.0914181470870972,
290
+ "learning_rate": 9.38e-05,
291
+ "loss": 0.4617,
292
+ "step": 470
293
+ },
294
+ {
295
+ "grad_norm": 1.1427769660949707,
296
+ "learning_rate": 9.58e-05,
297
+ "loss": 0.4483,
298
+ "step": 480
299
+ },
300
+ {
301
+ "grad_norm": 0.9296786785125732,
302
+ "learning_rate": 9.78e-05,
303
+ "loss": 0.4222,
304
+ "step": 490
305
+ },
306
+ {
307
+ "eval/loss": 0.4169562757015228,
308
+ "step": 500
309
+ },
310
+ {
311
+ "grad_norm": 1.1534019708633423,
312
+ "learning_rate": 9.98e-05,
313
+ "loss": 0.4134,
314
+ "step": 500
315
+ },
316
+ {
317
+ "grad_norm": 1.0778305530548096,
318
+ "learning_rate": 9.9999778549206e-05,
319
+ "loss": 0.4081,
320
+ "step": 510
321
+ },
322
+ {
323
+ "grad_norm": 1.007461428642273,
324
+ "learning_rate": 9.999901304280685e-05,
325
+ "loss": 0.3938,
326
+ "step": 520
327
+ },
328
+ {
329
+ "grad_norm": 0.8525221943855286,
330
+ "learning_rate": 9.999770075521164e-05,
331
+ "loss": 0.3874,
332
+ "step": 530
333
+ },
334
+ {
335
+ "grad_norm": 1.109859585762024,
336
+ "learning_rate": 9.99958417007713e-05,
337
+ "loss": 0.387,
338
+ "step": 540
339
+ },
340
+ {
341
+ "grad_norm": 1.0042812824249268,
342
+ "learning_rate": 9.999343589981615e-05,
343
+ "loss": 0.373,
344
+ "step": 550
345
+ },
346
+ {
347
+ "grad_norm": 0.9587410092353821,
348
+ "learning_rate": 9.999048337865568e-05,
349
+ "loss": 0.3665,
350
+ "step": 560
351
+ },
352
+ {
353
+ "grad_norm": 1.1748636960983276,
354
+ "learning_rate": 9.998698416957815e-05,
355
+ "loss": 0.3461,
356
+ "step": 570
357
+ },
358
+ {
359
+ "grad_norm": 1.0844266414642334,
360
+ "learning_rate": 9.998293831085037e-05,
361
+ "loss": 0.3271,
362
+ "step": 580
363
+ },
364
+ {
365
+ "grad_norm": 0.927900493144989,
366
+ "learning_rate": 9.997834584671719e-05,
367
+ "loss": 0.3336,
368
+ "step": 590
369
+ },
370
+ {
371
+ "grad_norm": 1.1865487098693848,
372
+ "learning_rate": 9.997320682740107e-05,
373
+ "loss": 0.3229,
374
+ "step": 600
375
+ },
376
+ {
377
+ "grad_norm": 1.2733619213104248,
378
+ "learning_rate": 9.996752130910149e-05,
379
+ "loss": 0.317,
380
+ "step": 610
381
+ },
382
+ {
383
+ "grad_norm": 1.2515984773635864,
384
+ "learning_rate": 9.99612893539944e-05,
385
+ "loss": 0.3263,
386
+ "step": 620
387
+ },
388
+ {
389
+ "grad_norm": 1.2944176197052002,
390
+ "learning_rate": 9.995451103023144e-05,
391
+ "loss": 0.3099,
392
+ "step": 630
393
+ },
394
+ {
395
+ "grad_norm": 1.0592591762542725,
396
+ "learning_rate": 9.994718641193928e-05,
397
+ "loss": 0.2869,
398
+ "step": 640
399
+ },
400
+ {
401
+ "grad_norm": 1.1084046363830566,
402
+ "learning_rate": 9.993931557921874e-05,
403
+ "loss": 0.2858,
404
+ "step": 650
405
+ },
406
+ {
407
+ "grad_norm": 0.9912689924240112,
408
+ "learning_rate": 9.993089861814402e-05,
409
+ "loss": 0.281,
410
+ "step": 660
411
+ },
412
+ {
413
+ "grad_norm": 1.1200611591339111,
414
+ "learning_rate": 9.992193562076166e-05,
415
+ "loss": 0.2603,
416
+ "step": 670
417
+ },
418
+ {
419
+ "grad_norm": 1.3195865154266357,
420
+ "learning_rate": 9.991242668508954e-05,
421
+ "loss": 0.2551,
422
+ "step": 680
423
+ },
424
+ {
425
+ "grad_norm": 1.0591305494308472,
426
+ "learning_rate": 9.990237191511587e-05,
427
+ "loss": 0.2296,
428
+ "step": 690
429
+ },
430
+ {
431
+ "grad_norm": 1.5685760974884033,
432
+ "learning_rate": 9.989177142079802e-05,
433
+ "loss": 0.2412,
434
+ "step": 700
435
+ },
436
+ {
437
+ "grad_norm": 0.9255881309509277,
438
+ "learning_rate": 9.988062531806126e-05,
439
+ "loss": 0.2232,
440
+ "step": 710
441
+ },
442
+ {
443
+ "grad_norm": 1.0331252813339233,
444
+ "learning_rate": 9.986893372879762e-05,
445
+ "loss": 0.2208,
446
+ "step": 720
447
+ },
448
+ {
449
+ "grad_norm": 1.0819493532180786,
450
+ "learning_rate": 9.985669678086443e-05,
451
+ "loss": 0.2124,
452
+ "step": 730
453
+ },
454
+ {
455
+ "grad_norm": 0.947820782661438,
456
+ "learning_rate": 9.984391460808298e-05,
457
+ "loss": 0.2011,
458
+ "step": 740
459
+ },
460
+ {
461
+ "grad_norm": 1.2813969850540161,
462
+ "learning_rate": 9.983058735023709e-05,
463
+ "loss": 0.204,
464
+ "step": 750
465
+ },
466
+ {
467
+ "grad_norm": 0.9526956081390381,
468
+ "learning_rate": 9.98167151530715e-05,
469
+ "loss": 0.2,
470
+ "step": 760
471
+ },
472
+ {
473
+ "grad_norm": 1.2552719116210938,
474
+ "learning_rate": 9.980229816829034e-05,
475
+ "loss": 0.1923,
476
+ "step": 770
477
+ },
478
+ {
479
+ "grad_norm": 1.0731266736984253,
480
+ "learning_rate": 9.978733655355544e-05,
481
+ "loss": 0.1874,
482
+ "step": 780
483
+ },
484
+ {
485
+ "grad_norm": 1.1821141242980957,
486
+ "learning_rate": 9.977183047248464e-05,
487
+ "loss": 0.1694,
488
+ "step": 790
489
+ },
490
+ {
491
+ "grad_norm": 1.3457095623016357,
492
+ "learning_rate": 9.975578009464992e-05,
493
+ "loss": 0.1903,
494
+ "step": 800
495
+ },
496
+ {
497
+ "grad_norm": 0.9491983652114868,
498
+ "learning_rate": 9.97391855955757e-05,
499
+ "loss": 0.1668,
500
+ "step": 810
501
+ },
502
+ {
503
+ "grad_norm": 1.257034182548523,
504
+ "learning_rate": 9.972204715673669e-05,
505
+ "loss": 0.1644,
506
+ "step": 820
507
+ },
508
+ {
509
+ "grad_norm": 1.0270410776138306,
510
+ "learning_rate": 9.970436496555617e-05,
511
+ "loss": 0.1504,
512
+ "step": 830
513
+ },
514
+ {
515
+ "grad_norm": 0.8814347386360168,
516
+ "learning_rate": 9.968613921540373e-05,
517
+ "loss": 0.1473,
518
+ "step": 840
519
+ },
520
+ {
521
+ "grad_norm": 1.0740413665771484,
522
+ "learning_rate": 9.966737010559326e-05,
523
+ "loss": 0.1602,
524
+ "step": 850
525
+ },
526
+ {
527
+ "grad_norm": 1.1755180358886719,
528
+ "learning_rate": 9.964805784138072e-05,
529
+ "loss": 0.1368,
530
+ "step": 860
531
+ },
532
+ {
533
+ "grad_norm": 1.1243144273757935,
534
+ "learning_rate": 9.962820263396195e-05,
535
+ "loss": 0.1473,
536
+ "step": 870
537
+ },
538
+ {
539
+ "grad_norm": 1.0385600328445435,
540
+ "learning_rate": 9.960780470047033e-05,
541
+ "loss": 0.1404,
542
+ "step": 880
543
+ },
544
+ {
545
+ "grad_norm": 0.8931643962860107,
546
+ "learning_rate": 9.958686426397437e-05,
547
+ "loss": 0.1312,
548
+ "step": 890
549
+ },
550
+ {
551
+ "grad_norm": 1.0461937189102173,
552
+ "learning_rate": 9.956538155347534e-05,
553
+ "loss": 0.1442,
554
+ "step": 900
555
+ },
556
+ {
557
+ "grad_norm": 1.0877867937088013,
558
+ "learning_rate": 9.95433568039047e-05,
559
+ "loss": 0.1284,
560
+ "step": 910
561
+ },
562
+ {
563
+ "grad_norm": 1.0202831029891968,
564
+ "learning_rate": 9.952079025612162e-05,
565
+ "loss": 0.1243,
566
+ "step": 920
567
+ },
568
+ {
569
+ "grad_norm": 1.262707233428955,
570
+ "learning_rate": 9.949768215691022e-05,
571
+ "loss": 0.1352,
572
+ "step": 930
573
+ },
574
+ {
575
+ "grad_norm": 1.084681749343872,
576
+ "learning_rate": 9.9474032758977e-05,
577
+ "loss": 0.1437,
578
+ "step": 940
579
+ },
580
+ {
581
+ "grad_norm": 0.9750522375106812,
582
+ "learning_rate": 9.944984232094794e-05,
583
+ "loss": 0.1335,
584
+ "step": 950
585
+ },
586
+ {
587
+ "grad_norm": 0.7702094912528992,
588
+ "learning_rate": 9.942511110736584e-05,
589
+ "loss": 0.127,
590
+ "step": 960
591
+ },
592
+ {
593
+ "grad_norm": 1.0232125520706177,
594
+ "learning_rate": 9.939983938868726e-05,
595
+ "loss": 0.1325,
596
+ "step": 970
597
+ },
598
+ {
599
+ "grad_norm": 0.9044125080108643,
600
+ "learning_rate": 9.93740274412797e-05,
601
+ "loss": 0.1276,
602
+ "step": 980
603
+ },
604
+ {
605
+ "grad_norm": 0.8179964423179626,
606
+ "learning_rate": 9.934767554741846e-05,
607
+ "loss": 0.1345,
608
+ "step": 990
609
+ },
610
+ {
611
+ "eval/loss": 0.1256265753507614,
612
+ "step": 1000
613
+ },
614
+ {
615
+ "grad_norm": 1.036224603652954,
616
+ "learning_rate": 9.932078399528361e-05,
617
+ "loss": 0.1203,
618
+ "step": 1000
619
+ }
620
+ ],
621
+ "logging_steps": 10,
622
+ "max_steps": 10000,
623
+ "num_input_tokens_seen": 0,
624
+ "num_train_epochs": 9223372036854775807,
625
+ "save_steps": 1000,
626
+ "stateful_callbacks": {
627
+ "TrainerControl": {
628
+ "args": {
629
+ "should_epoch_stop": false,
630
+ "should_evaluate": false,
631
+ "should_log": false,
632
+ "should_save": true,
633
+ "should_training_stop": false
634
+ },
635
+ "attributes": {}
636
+ }
637
+ },
638
+ "total_flos": 0.0,
639
+ "train_batch_size": 32,
640
+ "trial_name": null,
641
+ "trial_params": null
642
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c21d8ba19741d330ef414ba8771884674852a767899792c728e670b3f431e5
3
+ size 5905
checkpoint-1000/wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "groot-wbc", "run_id": "run-2026-05-28-090411"}