Upload 3 files

77e28ab verified 7 months ago

7.53 kB

	TAG: debug
	LOG_DIR: tensorboard_logs
	seed: 7
	vla_path: paligemma-3b-pt-224
	data_root_dir: /galaxea_dataset/galaxea/pp_project/rlds_334_tasks_distributed/
	dataset_name: bbox_training_r1_lite_5_parts
	run_root_dir: runs/base
	adapter_tmp_dir: adapter_tmp_weights
	hf_token: /galaxea_fulltime/share/.hf_token
	ckpt: /galaxea_fulltime/pretrained_ckpts/pi0_libero/pi0_torch_state.pt
	use_lora: false
	lora_rank: 32
	lora_dropout: 0.0
	use_quantization: false
	enable_bf16: true
	model_param_to_bf16: false
	vla_training_strategy: vla-full-train
	weight_decay: 1.0e-06
	batch_size: 4
	grad_accumulation_steps: 1
	learning_rate: 2.5e-05
	warmup_steps: 500
	lr_scheduler_type: cosine
	image_aug: true
	max_epochs: 8
	save_steps: 23523
	log_steps: 100
	use_torch_compile: false
	wandb_project: 1101_pnp_rla_image_condition_376_tasks_5_parts
	wandb_entity: cuijianning1996-galaxea-ai
	exp_name: 376_tasks_img_as_cond_with_randomly_rotated_bbox
	use_ema: false
	ema:
	update_after_step: 0
	power: 0.67
	DATASET:
	robot_cfg:
	with_left_arm: true
	with_right_arm: true
	with_torso: false
	with_chassis: false
	use_relative_joint_action: true
	window_size: 1
	future_action_window_size: 31
	camera_views:
	- head_condition
	- head
	- wrist_left
	- wrist_right
	shuffle_buffer_size: 10000
	balance_weights: false
	use_last_action: false
	share_datasets_statistics: true
	short_prompt: true
	aug_instruction_kwargs:
	drop_high_level_prob: 1.0
	bbox_as_instruction: false
	image_condition: true
	image_condition_lang_prefix: Pick the object in the first image and place into
	the tableware.
	bbox_jitter_ratio: 0.0
	action_proprio_normalization_type: normal
	use_pretrained_data_stats: false
	proprio_noise_std: 0.05
	image_augment_kwargs:
	head:
	random_brightness:
	- 0.2
	random_contrast:
	- 0.8
	- 1.2
	random_saturation:
	- 0.8
	- 1.2
	random_hue:
	- 0.05
	augment_order:
	- random_brightness
	- random_contrast
	- random_saturation
	- random_hue
	wrist_left:
	random_brightness:
	- 0.2
	random_contrast:
	- 0.8
	- 1.2
	random_saturation:
	- 0.8
	- 1.2
	random_hue:
	- 0.05
	random_drop_all_image:
	- 0.3
	augment_order:
	- random_drop_all_image
	- random_brightness
	- random_contrast
	- random_saturation
	- random_hue
	wrist_right:
	random_brightness:
	- 0.2
	random_contrast:
	- 0.8
	- 1.2
	random_saturation:
	- 0.8
	- 1.2
	random_hue:
	- 0.05
	random_drop_all_image:
	- 0.3
	augment_order:
	- random_drop_all_image
	- random_brightness
	- random_contrast
	- random_saturation
	- random_hue
	model_family: galaxea_zero
	MODEL:
	name: vla.galaxea_zero.GalaxeaZeroWrapper
	vla_name: "paligemma-3b-pt-224"
	load_inside: False
	pretrained_model_path: /galaxea_fulltime/pretrained_ckpts/cache/paligemma-3b-pt-224
	input_ids: True
	action_expert_only: False
	image_token_index: 257152
	vocab_size: 257216
	pad_token_id: 0
	cond_steps: 1 # len proprio
	horizon_steps: 32
	action_dim: 26 # 2 x [QPOS (6) + gripper (1)] + Torso Velocity (6) + Chassis Velocity (6)
	proprio_dim: 21 # 2 * [QPOS (6) + gripper (1)] + 4 (torso) + 3 (base vel) + last action(26)
	max_text_tokens: 55 # 55 for galaxea0002
	max_seq_len: ${eval:'${MODEL.num_input_images} * ${MODEL.vision.num_image_tokens} + ${MODEL.max_text_tokens}'}
	max_image_text_tokens: ${MODEL.max_seq_len} # = ${max_seq_len}
	action_decoder_layers: 2
	flow_sampling: beta
	num_inference_steps: 10
	final_action_clip_value: null # data normalized in [-1,1]
	action_expert_adaptive_mode: null
	num_input_images: ${eval:'${DATASET.window_size} * len(${DATASET.camera_views})'} # $DATASET.window_size * LEN($DATASET.camera_views)
	vision:
	name: vla.model.paligemma.siglip.SiglipVisionModel
	hidden_size: 1152 # siglip
	intermediate_size: 4304
	num_hidden_layers: 27
	num_attention_heads: 16
	num_channels: 3
	image_size: 224
	patch_size: 14
	layer_norm_eps: 0.000001
	attention_dropout: 0.0
	num_image_tokens: 256
	lora:
	r: ${lora_rank}
	dropout: ${lora_dropout}
	use_quantize: False
	use_lora: False
	vision_projector:
	name: vla.model.paligemma.siglip.PaliGemmaMultiModalProjector
	vision_config:
	hidden_size: 1152
	projection_dim: 2048
	lora:
	r: ${lora_rank}
	dropout: ${lora_dropout}
	use_quantize: False
	use_lora: False
	joint:
	name: vla.model.g0.joint_model.JointModel
	action_expert_adaptive_mode: null
	mixture:
	vlm: # gemma
	hidden_size: 2048
	intermediate_size: 16384
	use_final_norm: False
	cache: True
	use_quantize: False
	use_lora: False
	adaptive_mode: # not applicable for gemma
	proprio:
	hidden_size: 1024
	intermediate_size: 4096
	use_final_norm: True # technically no, but sharing weights with action anyway
	cache: True
	use_quantize: False
	use_lora: False
	adaptive_mode: null
	action:
	hidden_size: 1024
	intermediate_size: 4096
	use_final_norm: True
	cache: False
	use_quantize: False
	use_lora: False
	adaptive_mode: null
	time_hidden_size: 256 # only applicable if using adaptive
	lora:
	r: ${lora_rank}
	dropout: ${lora_dropout}
	num_hidden_layers: 18
	num_attention_heads: 8
	num_key_value_heads: 1
	head_dim: 256
	max_position_embeddings: 8192
	rms_norm_eps: 0.000001
	rope_theta: 10000.0
	attention_bias: False
	attention_dropout: 0.0
	pad_token_id: 0
	#################################################################################################################
	# For evaluation
	#################################################################################################################
	EVALUATION:
	checkpoint: null # Pretrained checkpoint path
	load_in_8bit: False # (For OpenVLA only) Load with 8-bit quantization
	load_in_4bit: False # (For OpenVLA only) Load with 4-bit quantization
	center_crop: True # Center crop? (if trained w/ random crop image aug)
	#################################################################################################################
	# LIBERO environment-specific parameters
	#################################################################################################################
	task_suite_name: "simpler_widowx" # Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90
	num_steps_wait: 10 # Number of steps to wait for objects to stabilize in sim
	num_trials_per_task: 24 # Number of rollouts per task
	use_wrist_image: False
	#################################################################################################################
	# Utils
	#################################################################################################################
	run_id_note: None # Extra note to add in run ID for logging
	local_log_dir: "./experiments/logs" # Local directory for eval logs
	use_wandb: False # Whether to also log results in Weights & Biases
	seed: 7 # Random Seed (for reproducibility)