TAG: debug LOG_DIR: tensorboard_logs seed: 7 vla_path: paligemma-3b-pt-224 data_root_dir: /galaxea_dataset/galaxea/pp_project/rlds_334_tasks_distributed/ dataset_name: bbox_training_r1_lite_5_parts run_root_dir: runs/base adapter_tmp_dir: adapter_tmp_weights hf_token: /galaxea_fulltime/share/.hf_token ckpt: /galaxea_fulltime/pretrained_ckpts/pi0_libero/pi0_torch_state.pt use_lora: false lora_rank: 32 lora_dropout: 0.0 use_quantization: false enable_bf16: true model_param_to_bf16: false vla_training_strategy: vla-full-train weight_decay: 1.0e-06 batch_size: 4 grad_accumulation_steps: 1 learning_rate: 2.5e-05 warmup_steps: 500 lr_scheduler_type: cosine image_aug: true max_epochs: 8 save_steps: 23523 log_steps: 100 use_torch_compile: false wandb_project: 1101_pnp_rla_image_condition_376_tasks_5_parts wandb_entity: cuijianning1996-galaxea-ai exp_name: 376_tasks_img_as_cond_with_randomly_rotated_bbox use_ema: false ema: update_after_step: 0 power: 0.67 DATASET: robot_cfg: with_left_arm: true with_right_arm: true with_torso: false with_chassis: false use_relative_joint_action: true window_size: 1 future_action_window_size: 31 camera_views: - head_condition - head - wrist_left - wrist_right shuffle_buffer_size: 10000 balance_weights: false use_last_action: false share_datasets_statistics: true short_prompt: true aug_instruction_kwargs: drop_high_level_prob: 1.0 bbox_as_instruction: false image_condition: true image_condition_lang_prefix: Pick the object in the first image and place into the tableware. bbox_jitter_ratio: 0.0 action_proprio_normalization_type: normal use_pretrained_data_stats: false proprio_noise_std: 0.05 image_augment_kwargs: head: random_brightness: - 0.2 random_contrast: - 0.8 - 1.2 random_saturation: - 0.8 - 1.2 random_hue: - 0.05 augment_order: - random_brightness - random_contrast - random_saturation - random_hue wrist_left: random_brightness: - 0.2 random_contrast: - 0.8 - 1.2 random_saturation: - 0.8 - 1.2 random_hue: - 0.05 random_drop_all_image: - 0.3 augment_order: - random_drop_all_image - random_brightness - random_contrast - random_saturation - random_hue wrist_right: random_brightness: - 0.2 random_contrast: - 0.8 - 1.2 random_saturation: - 0.8 - 1.2 random_hue: - 0.05 random_drop_all_image: - 0.3 augment_order: - random_drop_all_image - random_brightness - random_contrast - random_saturation - random_hue model_family: galaxea_zero MODEL: name: vla.galaxea_zero.GalaxeaZeroWrapper vla_name: "paligemma-3b-pt-224" load_inside: False pretrained_model_path: /galaxea_fulltime/pretrained_ckpts/cache/paligemma-3b-pt-224 input_ids: True action_expert_only: False image_token_index: 257152 vocab_size: 257216 pad_token_id: 0 cond_steps: 1 # len proprio horizon_steps: 32 action_dim: 26 # 2 x [QPOS (6) + gripper (1)] + Torso Velocity (6) + Chassis Velocity (6) proprio_dim: 21 # 2 * [QPOS (6) + gripper (1)] + 4 (torso) + 3 (base vel) + last action(26) max_text_tokens: 55 # 55 for galaxea0002 max_seq_len: ${eval:'${MODEL.num_input_images} * ${MODEL.vision.num_image_tokens} + ${MODEL.max_text_tokens}'} max_image_text_tokens: ${MODEL.max_seq_len} # = ${max_seq_len} action_decoder_layers: 2 flow_sampling: beta num_inference_steps: 10 final_action_clip_value: null # data normalized in [-1,1] action_expert_adaptive_mode: null num_input_images: ${eval:'${DATASET.window_size} * len(${DATASET.camera_views})'} # $DATASET.window_size * LEN($DATASET.camera_views) vision: name: vla.model.paligemma.siglip.SiglipVisionModel hidden_size: 1152 # siglip intermediate_size: 4304 num_hidden_layers: 27 num_attention_heads: 16 num_channels: 3 image_size: 224 patch_size: 14 layer_norm_eps: 0.000001 attention_dropout: 0.0 num_image_tokens: 256 lora: r: ${lora_rank} dropout: ${lora_dropout} use_quantize: False use_lora: False vision_projector: name: vla.model.paligemma.siglip.PaliGemmaMultiModalProjector vision_config: hidden_size: 1152 projection_dim: 2048 lora: r: ${lora_rank} dropout: ${lora_dropout} use_quantize: False use_lora: False joint: name: vla.model.g0.joint_model.JointModel action_expert_adaptive_mode: null mixture: vlm: # gemma hidden_size: 2048 intermediate_size: 16384 use_final_norm: False cache: True use_quantize: False use_lora: False adaptive_mode: # not applicable for gemma proprio: hidden_size: 1024 intermediate_size: 4096 use_final_norm: True # technically no, but sharing weights with action anyway cache: True use_quantize: False use_lora: False adaptive_mode: null action: hidden_size: 1024 intermediate_size: 4096 use_final_norm: True cache: False use_quantize: False use_lora: False adaptive_mode: null time_hidden_size: 256 # only applicable if using adaptive lora: r: ${lora_rank} dropout: ${lora_dropout} num_hidden_layers: 18 num_attention_heads: 8 num_key_value_heads: 1 head_dim: 256 max_position_embeddings: 8192 rms_norm_eps: 0.000001 rope_theta: 10000.0 attention_bias: False attention_dropout: 0.0 pad_token_id: 0 ################################################################################################################# # For evaluation ################################################################################################################# EVALUATION: checkpoint: null # Pretrained checkpoint path load_in_8bit: False # (For OpenVLA only) Load with 8-bit quantization load_in_4bit: False # (For OpenVLA only) Load with 4-bit quantization center_crop: True # Center crop? (if trained w/ random crop image aug) ################################################################################################################# # LIBERO environment-specific parameters ################################################################################################################# task_suite_name: "simpler_widowx" # Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90 num_steps_wait: 10 # Number of steps to wait for objects to stabilize in sim num_trials_per_task: 24 # Number of rollouts per task use_wrist_image: False ################################################################################################################# # Utils ################################################################################################################# run_id_note: None # Extra note to add in run ID for logging local_log_dir: "./experiments/logs" # Local directory for eval logs use_wandb: False # Whether to also log results in Weights & Biases seed: 7 # Random Seed (for reproducibility)