Robotics
ONNX
English
Chinese
real-world
dual-arm
whole body control
manipulation
G0-VLA / G0_Plus_P&P /config.yaml
zjt2656404169's picture
Upload 3 files
77e28ab verified
Raw
History Blame
7.53 kB
TAG: debug
LOG_DIR: tensorboard_logs
seed: 7
vla_path: paligemma-3b-pt-224
data_root_dir: /galaxea_dataset/galaxea/pp_project/rlds_334_tasks_distributed/
dataset_name: bbox_training_r1_lite_5_parts
run_root_dir: runs/base
adapter_tmp_dir: adapter_tmp_weights
hf_token: /galaxea_fulltime/share/.hf_token
ckpt: /galaxea_fulltime/pretrained_ckpts/pi0_libero/pi0_torch_state.pt
use_lora: false
lora_rank: 32
lora_dropout: 0.0
use_quantization: false
enable_bf16: true
model_param_to_bf16: false
vla_training_strategy: vla-full-train
weight_decay: 1.0e-06
batch_size: 4
grad_accumulation_steps: 1
learning_rate: 2.5e-05
warmup_steps: 500
lr_scheduler_type: cosine
image_aug: true
max_epochs: 8
save_steps: 23523
log_steps: 100
use_torch_compile: false
wandb_project: 1101_pnp_rla_image_condition_376_tasks_5_parts
wandb_entity: cuijianning1996-galaxea-ai
exp_name: 376_tasks_img_as_cond_with_randomly_rotated_bbox
use_ema: false
ema:
update_after_step: 0
power: 0.67
DATASET:
robot_cfg:
with_left_arm: true
with_right_arm: true
with_torso: false
with_chassis: false
use_relative_joint_action: true
window_size: 1
future_action_window_size: 31
camera_views:
- head_condition
- head
- wrist_left
- wrist_right
shuffle_buffer_size: 10000
balance_weights: false
use_last_action: false
share_datasets_statistics: true
short_prompt: true
aug_instruction_kwargs:
drop_high_level_prob: 1.0
bbox_as_instruction: false
image_condition: true
image_condition_lang_prefix: Pick the object in the first image and place into
the tableware.
bbox_jitter_ratio: 0.0
action_proprio_normalization_type: normal
use_pretrained_data_stats: false
proprio_noise_std: 0.05
image_augment_kwargs:
head:
random_brightness:
- 0.2
random_contrast:
- 0.8
- 1.2
random_saturation:
- 0.8
- 1.2
random_hue:
- 0.05
augment_order:
- random_brightness
- random_contrast
- random_saturation
- random_hue
wrist_left:
random_brightness:
- 0.2
random_contrast:
- 0.8
- 1.2
random_saturation:
- 0.8
- 1.2
random_hue:
- 0.05
random_drop_all_image:
- 0.3
augment_order:
- random_drop_all_image
- random_brightness
- random_contrast
- random_saturation
- random_hue
wrist_right:
random_brightness:
- 0.2
random_contrast:
- 0.8
- 1.2
random_saturation:
- 0.8
- 1.2
random_hue:
- 0.05
random_drop_all_image:
- 0.3
augment_order:
- random_drop_all_image
- random_brightness
- random_contrast
- random_saturation
- random_hue
model_family: galaxea_zero
MODEL:
name: vla.galaxea_zero.GalaxeaZeroWrapper
vla_name: "paligemma-3b-pt-224"
load_inside: False
pretrained_model_path: /galaxea_fulltime/pretrained_ckpts/cache/paligemma-3b-pt-224
input_ids: True
action_expert_only: False
image_token_index: 257152
vocab_size: 257216
pad_token_id: 0
cond_steps: 1 # len proprio
horizon_steps: 32
action_dim: 26 # 2 x [QPOS (6) + gripper (1)] + Torso Velocity (6) + Chassis Velocity (6)
proprio_dim: 21 # 2 * [QPOS (6) + gripper (1)] + 4 (torso) + 3 (base vel) + last action(26)
max_text_tokens: 55 # 55 for galaxea0002
max_seq_len: ${eval:'${MODEL.num_input_images} * ${MODEL.vision.num_image_tokens} + ${MODEL.max_text_tokens}'}
max_image_text_tokens: ${MODEL.max_seq_len} # = ${max_seq_len}
action_decoder_layers: 2
flow_sampling: beta
num_inference_steps: 10
final_action_clip_value: null # data normalized in [-1,1]
action_expert_adaptive_mode: null
num_input_images: ${eval:'${DATASET.window_size} * len(${DATASET.camera_views})'} # $DATASET.window_size * LEN($DATASET.camera_views)
vision:
name: vla.model.paligemma.siglip.SiglipVisionModel
hidden_size: 1152 # siglip
intermediate_size: 4304
num_hidden_layers: 27
num_attention_heads: 16
num_channels: 3
image_size: 224
patch_size: 14
layer_norm_eps: 0.000001
attention_dropout: 0.0
num_image_tokens: 256
lora:
r: ${lora_rank}
dropout: ${lora_dropout}
use_quantize: False
use_lora: False
vision_projector:
name: vla.model.paligemma.siglip.PaliGemmaMultiModalProjector
vision_config:
hidden_size: 1152
projection_dim: 2048
lora:
r: ${lora_rank}
dropout: ${lora_dropout}
use_quantize: False
use_lora: False
joint:
name: vla.model.g0.joint_model.JointModel
action_expert_adaptive_mode: null
mixture:
vlm: # gemma
hidden_size: 2048
intermediate_size: 16384
use_final_norm: False
cache: True
use_quantize: False
use_lora: False
adaptive_mode: # not applicable for gemma
proprio:
hidden_size: 1024
intermediate_size: 4096
use_final_norm: True # technically no, but sharing weights with action anyway
cache: True
use_quantize: False
use_lora: False
adaptive_mode: null
action:
hidden_size: 1024
intermediate_size: 4096
use_final_norm: True
cache: False
use_quantize: False
use_lora: False
adaptive_mode: null
time_hidden_size: 256 # only applicable if using adaptive
lora:
r: ${lora_rank}
dropout: ${lora_dropout}
num_hidden_layers: 18
num_attention_heads: 8
num_key_value_heads: 1
head_dim: 256
max_position_embeddings: 8192
rms_norm_eps: 0.000001
rope_theta: 10000.0
attention_bias: False
attention_dropout: 0.0
pad_token_id: 0
#################################################################################################################
# For evaluation
#################################################################################################################
EVALUATION:
checkpoint: null # Pretrained checkpoint path
load_in_8bit: False # (For OpenVLA only) Load with 8-bit quantization
load_in_4bit: False # (For OpenVLA only) Load with 4-bit quantization
center_crop: True # Center crop? (if trained w/ random crop image aug)
#################################################################################################################
# LIBERO environment-specific parameters
#################################################################################################################
task_suite_name: "simpler_widowx" # Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90
num_steps_wait: 10 # Number of steps to wait for objects to stabilize in sim
num_trials_per_task: 24 # Number of rollouts per task
use_wrist_image: False
#################################################################################################################
# Utils
#################################################################################################################
run_id_note: None # Extra note to add in run ID for logging
local_log_dir: "./experiments/logs" # Local directory for eval logs
use_wandb: False # Whether to also log results in Weights & Biases
seed: 7 # Random Seed (for reproducibility)