| TAG: debug |
| LOG_DIR: tensorboard_logs |
| seed: 7 |
| vla_path: paligemma-3b-pt-224 |
| data_root_dir: /galaxea_dataset/galaxea/pp_project/rlds_334_tasks_distributed/ |
| dataset_name: bbox_training_r1_lite_5_parts |
| run_root_dir: runs/base |
| adapter_tmp_dir: adapter_tmp_weights |
| hf_token: /galaxea_fulltime/share/.hf_token |
| ckpt: /galaxea_fulltime/pretrained_ckpts/pi0_libero/pi0_torch_state.pt |
| use_lora: false |
| lora_rank: 32 |
| lora_dropout: 0.0 |
| use_quantization: false |
| enable_bf16: true |
| model_param_to_bf16: false |
| vla_training_strategy: vla-full-train |
| weight_decay: 1.0e-06 |
| batch_size: 4 |
| grad_accumulation_steps: 1 |
| learning_rate: 2.5e-05 |
| warmup_steps: 500 |
| lr_scheduler_type: cosine |
| image_aug: true |
| max_epochs: 8 |
| save_steps: 23523 |
| log_steps: 100 |
| use_torch_compile: false |
| wandb_project: 1101_pnp_rla_image_condition_376_tasks_5_parts |
| wandb_entity: cuijianning1996-galaxea-ai |
| exp_name: 376_tasks_img_as_cond_with_randomly_rotated_bbox |
| use_ema: false |
| ema: |
| update_after_step: 0 |
| power: 0.67 |
| DATASET: |
| robot_cfg: |
| with_left_arm: true |
| with_right_arm: true |
| with_torso: false |
| with_chassis: false |
| use_relative_joint_action: true |
| window_size: 1 |
| future_action_window_size: 31 |
| camera_views: |
| - head_condition |
| - head |
| - wrist_left |
| - wrist_right |
| shuffle_buffer_size: 10000 |
| balance_weights: false |
| use_last_action: false |
| share_datasets_statistics: true |
| short_prompt: true |
| aug_instruction_kwargs: |
| drop_high_level_prob: 1.0 |
| bbox_as_instruction: false |
| image_condition: true |
| image_condition_lang_prefix: Pick the object in the first image and place into |
| the tableware. |
| bbox_jitter_ratio: 0.0 |
| action_proprio_normalization_type: normal |
| use_pretrained_data_stats: false |
| proprio_noise_std: 0.05 |
| image_augment_kwargs: |
| head: |
| random_brightness: |
| - 0.2 |
| random_contrast: |
| - 0.8 |
| - 1.2 |
| random_saturation: |
| - 0.8 |
| - 1.2 |
| random_hue: |
| - 0.05 |
| augment_order: |
| - random_brightness |
| - random_contrast |
| - random_saturation |
| - random_hue |
| wrist_left: |
| random_brightness: |
| - 0.2 |
| random_contrast: |
| - 0.8 |
| - 1.2 |
| random_saturation: |
| - 0.8 |
| - 1.2 |
| random_hue: |
| - 0.05 |
| random_drop_all_image: |
| - 0.3 |
| augment_order: |
| - random_drop_all_image |
| - random_brightness |
| - random_contrast |
| - random_saturation |
| - random_hue |
| wrist_right: |
| random_brightness: |
| - 0.2 |
| random_contrast: |
| - 0.8 |
| - 1.2 |
| random_saturation: |
| - 0.8 |
| - 1.2 |
| random_hue: |
| - 0.05 |
| random_drop_all_image: |
| - 0.3 |
| augment_order: |
| - random_drop_all_image |
| - random_brightness |
| - random_contrast |
| - random_saturation |
| - random_hue |
| model_family: galaxea_zero |
| MODEL: |
| name: vla.galaxea_zero.GalaxeaZeroWrapper |
| vla_name: "paligemma-3b-pt-224" |
| load_inside: False |
| pretrained_model_path: /galaxea_fulltime/pretrained_ckpts/cache/paligemma-3b-pt-224 |
| input_ids: True |
| action_expert_only: False |
| image_token_index: 257152 |
| vocab_size: 257216 |
| pad_token_id: 0 |
| cond_steps: 1 |
| horizon_steps: 32 |
| action_dim: 26 |
| proprio_dim: 21 |
| max_text_tokens: 55 |
| max_seq_len: ${eval:'${MODEL.num_input_images} * ${MODEL.vision.num_image_tokens} + ${MODEL.max_text_tokens}'} |
| max_image_text_tokens: ${MODEL.max_seq_len} |
| action_decoder_layers: 2 |
| flow_sampling: beta |
| num_inference_steps: 10 |
| final_action_clip_value: null |
| action_expert_adaptive_mode: null |
| num_input_images: ${eval:'${DATASET.window_size} * len(${DATASET.camera_views})'} |
| vision: |
| name: vla.model.paligemma.siglip.SiglipVisionModel |
| hidden_size: 1152 |
| intermediate_size: 4304 |
| num_hidden_layers: 27 |
| num_attention_heads: 16 |
| num_channels: 3 |
| image_size: 224 |
| patch_size: 14 |
| layer_norm_eps: 0.000001 |
| attention_dropout: 0.0 |
| num_image_tokens: 256 |
| lora: |
| r: ${lora_rank} |
| dropout: ${lora_dropout} |
| use_quantize: False |
| use_lora: False |
| vision_projector: |
| name: vla.model.paligemma.siglip.PaliGemmaMultiModalProjector |
| vision_config: |
| hidden_size: 1152 |
| projection_dim: 2048 |
| lora: |
| r: ${lora_rank} |
| dropout: ${lora_dropout} |
| use_quantize: False |
| use_lora: False |
| joint: |
| name: vla.model.g0.joint_model.JointModel |
| action_expert_adaptive_mode: null |
| mixture: |
| vlm: |
| hidden_size: 2048 |
| intermediate_size: 16384 |
| use_final_norm: False |
| cache: True |
| use_quantize: False |
| use_lora: False |
| adaptive_mode: |
| proprio: |
| hidden_size: 1024 |
| intermediate_size: 4096 |
| use_final_norm: True |
| cache: True |
| use_quantize: False |
| use_lora: False |
| adaptive_mode: null |
| action: |
| hidden_size: 1024 |
| intermediate_size: 4096 |
| use_final_norm: True |
| cache: False |
| use_quantize: False |
| use_lora: False |
| adaptive_mode: null |
| time_hidden_size: 256 |
| lora: |
| r: ${lora_rank} |
| dropout: ${lora_dropout} |
| num_hidden_layers: 18 |
| num_attention_heads: 8 |
| num_key_value_heads: 1 |
| head_dim: 256 |
| max_position_embeddings: 8192 |
| rms_norm_eps: 0.000001 |
| rope_theta: 10000.0 |
| attention_bias: False |
| attention_dropout: 0.0 |
| pad_token_id: 0 |
| |
| |
| |
| EVALUATION: |
| checkpoint: null |
| load_in_8bit: False |
| load_in_4bit: False |
| center_crop: True |
| |
| |
| |
| task_suite_name: "simpler_widowx" |
| num_steps_wait: 10 |
| num_trials_per_task: 24 |
| use_wrist_image: False |
| |
| |
| |
| run_id_note: None |
| local_log_dir: "./experiments/logs" |
| use_wandb: False |
| seed: 7 |
|
|