seed: 7 resume_ckpt: null output_dir: ${hydra:runtime.output_dir} dataset_stats_cache_dir: ${oc.env:GALAXEA_FM_DATASET_STATS_CACHE_DIR} checkpointing_steps: 5000 logger: type: swanlab log_steps: 10 task: ${hydra:runtime.choices.task} project: ${split:${logger.task},0} experiment_name: ${split:${logger.task},-1} mode: cloud workspace: Galaxea-AI dir: null batch_size_val: 16 eval_episodes_num: 1 ckpt_path: null env: R1ProBlocksStackEasy target_controller_type: bimanual_relaxed_ik tags: null edp: card: null training_time: ${now:%Y-%m-%d}_${now:%H-%M-%S} git_branch: null git_commit: null root: null repo_ids: null save_dir: ${output_dir} tags: ${tags} max_steps: ${model.max_steps} batch_size: ${model.batch_size} libero_eval: task_suite_names: - libero_10 - libero_spatial - libero_object - libero_goal num_steps_wait: 10 replan_steps: 5 num_trials: 50 output_dir: ${output_dir} run_id_note: null env_num: 50 data: dataset: _target_: galaxea_fm.data.galaxea_lerobot_dataset.GalaxeaLerobotDataset dataset_dirs: - /To/Your/Path shape_meta: action: - key: left_arm raw_shape: 6 shape: 6 - key: left_gripper raw_shape: 1 shape: 1 - key: right_arm raw_shape: 6 shape: 6 - key: right_gripper raw_shape: 1 shape: 1 - key: torso.velocities raw_shape: 6 shape: 6 - key: chassis.velocities raw_shape: 6 shape: 6 state: - key: left_arm raw_shape: 6 shape: 6 - key: left_gripper raw_shape: 1 shape: 1 - key: right_arm raw_shape: 6 shape: 6 - key: right_gripper raw_shape: 1 shape: 1 - key: torso raw_shape: 4 shape: 4 - key: chassis raw_shape: 3 shape: 3 images: - key: head_rgb raw_shape: - 3 - 720 - 1280 shape: - 3 - ${model.model_meta.input_image_size.0} - ${model.model_meta.input_image_size.1} - key: left_wrist_rgb raw_shape: - 3 - 720 - 1280 shape: - 3 - ${model.model_meta.input_image_size.0} - ${model.model_meta.input_image_size.1} - key: right_wrist_rgb raw_shape: - 3 - 720 - 1280 shape: - 3 - ${model.model_meta.input_image_size.0} - ${model.model_meta.input_image_size.1} action_size: 32 past_action_size: 0 obs_size: 1 ee_start_moving_thresh: 0.0 val_set_proportion: 0.05 processor: _target_: galaxea_fm.processors.base_processor.BaseProcessor shape_meta: ${data.dataset.shape_meta} num_obs_steps: ${data.dataset.obs_size} num_output_cameras: 3 action_output_dim: ${sum_shapes:${data.dataset.shape_meta.action}} proprio_output_dim: ${sum_shapes:${data.dataset.shape_meta.state}} action_state_transforms: - _target_: galaxea_fm.transforms.relative_action.RelativeJointTransform keys: - left_arm - right_arm - _target_: galaxea_fm.transforms.misc.WrapStateAngle keys: - chassis use_stepwise_action_norm: true norm_default_mode: ${model.model_meta.norm_default_mode} norm_exception_mode: action: left_gripper: 0/100 right_gripper: 0/100 action_state_merger: _target_: galaxea_fm.transforms.action_state_merger.ConcatLeftAlign train_transforms: head_rgb: - _target_: torchvision.transforms.Resize size: ${model.model_meta.input_image_size} - _target_: galaxea_fm.transforms.image.ToTensor - _target_: torchvision.transforms.Normalize mean: - 0.5 - 0.5 - 0.5 std: - 0.5 - 0.5 - 0.5 left_wrist_rgb: ${data.processor.train_transforms.head_rgb} right_wrist_rgb: ${data.processor.train_transforms.head_rgb} val_transforms: head_rgb: - _target_: torchvision.transforms.Resize size: ${model.model_meta.input_image_size} - _target_: galaxea_fm.transforms.image.ToTensor - _target_: torchvision.transforms.Normalize mean: - 0.5 - 0.5 - 0.5 std: - 0.5 - 0.5 - 0.5 left_wrist_rgb: ${data.processor.val_transforms.head_rgb} right_wrist_rgb: ${data.processor.val_transforms.head_rgb} drop_high_level_prob: 1.0 use_zh_instruction: false tokenizer: ${model.tokenizer} model: pretrained_ckpt: use_pretrained_norm_stats: true model_weights_to_bf16: false enable_bf16_training: true use_torch_compile: false find_unused_parameters: false batch_size: 8 num_workers: 4 pin_memory: true persistent_workers: true max_epochs: 10 max_steps: null grad_accumulation_steps: 1 use_8bit_optimizer: false learning_rate: 0.00012 weight_decay: 0.0001 betas: - 0.9 - 0.95 lr_scheduler_type: cosine warmup_steps: 5000 max_grad_norm: 1.0 use_ema: false ema: update_after_step: 0 power: 0.67 use_sync_bn: false tokenizer: _target_: galaxea_fm.models.galaxea_zero.paligemma.tokenizer.PaliGemmaTokenizer tokenizer_params: pretrained_model_name_or_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224 local_files_only: true pad_token_id: ${model.model_arch.pad_token_id} image_token_index: ${model.model_arch.image_token_index} max_text_tokens: ${model.model_arch.max_text_tokens} num_tokens_per_image: ${model.model_arch.vision.num_image_tokens} num_input_images: ${model.model_arch.num_input_images} model_arch: _target_: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZeroPolicy model_name: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZero pretrained_model_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224 vla_training_strategy: vla-full-train backbone_lr_multiplier: 1.0 image_token_index: 257152 pad_token_id: 0 vocab_size: 257216 fill_padded_with_token: false embed_token_key_prefix: language_model.model.embed_tokens cond_steps: ${data.dataset.obs_size} horizon_steps: ${data.dataset.action_size} max_text_tokens: 55 num_input_images: ${eval:'${model.model_arch.cond_steps} * ${data.processor.num_output_cameras}'} max_image_text_tokens: ${eval:'${model.model_arch.num_input_images} * ${model.model_arch.vision.num_image_tokens} + ${model.model_arch.max_text_tokens}'} final_action_clip_value: null action_dim: ${data.processor.action_output_dim} proprio_dim: ${data.processor.proprio_output_dim} action_decoder_layers: 2 action_expert_adaptive_mode: null flow_sampling: beta num_inference_steps: 10 vision: name: galaxea_fm.models.galaxea_zero.paligemma.siglip.SiglipVisionModel key_prefix: vision_tower hidden_size: 1152 intermediate_size: 4304 num_hidden_layers: 27 num_attention_heads: 16 num_channels: 3 image_size: 224 patch_size: 14 layer_norm_eps: 1.0e-06 attention_dropout: 0.0 num_image_tokens: 256 vision_projector: name: galaxea_fm.models.galaxea_zero.paligemma.siglip.PaliGemmaMultiModalProjector key_prefix: multi_modal_projector vision_config: hidden_size: 1152 projection_dim: 2048 joint: name: galaxea_fm.models.galaxea_zero.joint_model.JointModel key_prefix: language_model.model action_expert_adaptive_mode: null module_names: mlp: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaMLP norm: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRMSNorm rope: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRotaryEmbedding mixture: vlm: hidden_size: 2048 intermediate_size: 16384 use_final_norm: false cache: true proprio: hidden_size: 1024 intermediate_size: 4096 use_final_norm: true cache: true adaptive_mode: null action: hidden_size: 1024 intermediate_size: 4096 use_final_norm: true cache: false adaptive_mode: null time_hidden_size: 256 num_hidden_layers: 18 num_attention_heads: 8 num_key_value_heads: 1 head_dim: 256 max_position_embeddings: 8192 rms_norm_eps: 1.0e-06 rope_theta: 10000.0 attention_bias: false attention_dropout: 0.0 model_meta: norm_default_mode: z-score input_image_size: - ${model.model_arch.vision.image_size} - ${model.model_arch.vision.image_size} pretrained_dataset_stats: