Instructions to use rslxcvg/molmoact2-banana-frombase-prod-r128-c010-rw3-w8-20260519 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- LeRobot
How to use rslxcvg/molmoact2-banana-frombase-prod-r128-c010-rw3-w8-20260519 with LeRobot:
- Notebooks
- Google Colab
- Kaggle
| { | |
| "dataset": { | |
| "repo_id": "rslxcvg/banana_act_direct_color_simple_v1_molmo_compat", | |
| "root": "/mnt/vla_picknplace/outputs/lerobot/banana_act_direct_color_simple_v1_molmo_compat", | |
| "episodes": null, | |
| "image_transforms": { | |
| "enable": true, | |
| "max_num_transforms": 3, | |
| "random_order": false, | |
| "tfs": { | |
| "brightness": { | |
| "weight": 1.0, | |
| "type": "ColorJitter", | |
| "kwargs": { | |
| "brightness": [ | |
| 0.8, | |
| 1.2 | |
| ] | |
| } | |
| }, | |
| "contrast": { | |
| "weight": 1.0, | |
| "type": "ColorJitter", | |
| "kwargs": { | |
| "contrast": [ | |
| 0.8, | |
| 1.2 | |
| ] | |
| } | |
| }, | |
| "saturation": { | |
| "weight": 1.0, | |
| "type": "ColorJitter", | |
| "kwargs": { | |
| "saturation": [ | |
| 0.5, | |
| 1.5 | |
| ] | |
| } | |
| }, | |
| "hue": { | |
| "weight": 1.0, | |
| "type": "ColorJitter", | |
| "kwargs": { | |
| "hue": [ | |
| -0.05, | |
| 0.05 | |
| ] | |
| } | |
| }, | |
| "sharpness": { | |
| "weight": 1.0, | |
| "type": "SharpnessJitter", | |
| "kwargs": { | |
| "sharpness": [ | |
| 0.5, | |
| 1.5 | |
| ] | |
| } | |
| }, | |
| "affine": { | |
| "weight": 1.0, | |
| "type": "RandomAffine", | |
| "kwargs": { | |
| "degrees": [ | |
| -5.0, | |
| 5.0 | |
| ], | |
| "translate": [ | |
| 0.05, | |
| 0.05 | |
| ] | |
| } | |
| } | |
| } | |
| }, | |
| "revision": null, | |
| "use_imagenet_stats": true, | |
| "video_backend": "pyav", | |
| "return_uint8": false, | |
| "streaming": false | |
| }, | |
| "env": null, | |
| "policy": { | |
| "type": "molmoact2", | |
| "n_obs_steps": 1, | |
| "input_features": { | |
| "observation.state": { | |
| "type": "STATE", | |
| "shape": [ | |
| 6 | |
| ] | |
| }, | |
| "observation.images.front": { | |
| "type": "VISUAL", | |
| "shape": [ | |
| 3, | |
| 480, | |
| 640 | |
| ] | |
| } | |
| }, | |
| "output_features": { | |
| "action": { | |
| "type": "ACTION", | |
| "shape": [ | |
| 6 | |
| ] | |
| } | |
| }, | |
| "device": "cuda", | |
| "use_amp": false, | |
| "use_peft": false, | |
| "push_to_hub": false, | |
| "repo_id": null, | |
| "private": null, | |
| "tags": null, | |
| "license": null, | |
| "pretrained_path": null, | |
| "checkpoint_path": "allenai/MolmoAct2-SO100_101", | |
| "checkpoint_revision": null, | |
| "checkpoint_force_download": false, | |
| "trust_remote_code": true, | |
| "chunk_size": 30, | |
| "n_action_steps": 30, | |
| "action_mode": "continuous", | |
| "inference_action_mode": null, | |
| "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer", | |
| "discrete_generation_max_steps": null, | |
| "norm_tag": "so100_so101_molmoact2", | |
| "setup_type": "single so100/so101 robotic arm in molmoact2", | |
| "control_mode": "absolute joint pose", | |
| "image_keys": [ | |
| "observation.images.front" | |
| ], | |
| "normalize_language": true, | |
| "add_setup_tokens": true, | |
| "add_control_tokens": true, | |
| "normalize_gripper": true, | |
| "num_state_tokens": 256, | |
| "max_sequence_length": null, | |
| "expected_max_action_dim": 32, | |
| "num_flow_timesteps": 8, | |
| "flow_matching_cutoff": 1.0, | |
| "flow_matching_time_offset": 0.001, | |
| "flow_matching_time_scale": 0.999, | |
| "flow_matching_beta_alpha": 1.0, | |
| "flow_matching_beta_beta": 1.5, | |
| "num_inference_steps": null, | |
| "mask_action_dim_padding": true, | |
| "flow_loss_action_dim_weights": [ | |
| 8.0, | |
| 3.0, | |
| 3.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| "enable_inference_cuda_graph": true, | |
| "per_episode_seed": false, | |
| "eval_seed": null, | |
| "rtc_config": null, | |
| "enable_lora_vlm": true, | |
| "lora_rank": 128, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.05, | |
| "lora_bias": "none", | |
| "enable_lora_action_expert": true, | |
| "enable_knowledge_insulation": false, | |
| "freeze_embedding": true, | |
| "train_action_expert_only": false, | |
| "gradient_checkpointing": true, | |
| "model_dtype": "bfloat16", | |
| "softmax_auxiliary_loss": true, | |
| "softmax_auxiliary_loss_scale": 0.0001, | |
| "discrete_loss_token_weighting": "root_subsegments_root_tokens", | |
| "optimizer_lr": 1e-05, | |
| "optimizer_vit_lr": 5e-06, | |
| "optimizer_connector_lr": 5e-06, | |
| "optimizer_action_expert_lr": 5e-05, | |
| "optimizer_betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "optimizer_eps": 1e-06, | |
| "optimizer_weight_decay": 0.0, | |
| "optimizer_grad_clip_norm": 1.0, | |
| "scheduler_warmup_steps": 200, | |
| "scheduler_decay_steps": null, | |
| "scheduler_decay_lr": 1e-06, | |
| "normalization_mapping": { | |
| "VISUAL": "IDENTITY", | |
| "STATE": "QUANTILES", | |
| "ACTION": "QUANTILES" | |
| }, | |
| "dataset_feature_names": { | |
| "action": [ | |
| "shoulder_pan.pos", | |
| "shoulder_lift.pos", | |
| "elbow_flex.pos", | |
| "wrist_flex.pos", | |
| "wrist_roll.pos", | |
| "gripper.pos" | |
| ], | |
| "observation.state": [ | |
| "shoulder_pan.pos", | |
| "shoulder_lift.pos", | |
| "elbow_flex.pos", | |
| "wrist_flex.pos", | |
| "wrist_roll.pos", | |
| "gripper.pos" | |
| ] | |
| } | |
| }, | |
| "reward_model": null, | |
| "output_dir": "/mnt/vla_picknplace/outputs/molmoact2/molmoact2_overnight_frombase_prod_r128_c010_rw3_w8_gpu5_20260519_fullcoverage_v1", | |
| "job_name": "molmoact2_overnight_frombase_prod_r128_c010_rw3_w8_gpu5_20260519_fullcoverage_v1", | |
| "resume": false, | |
| "seed": 1000, | |
| "cudnn_deterministic": false, | |
| "num_workers": 4, | |
| "batch_size": 16, | |
| "prefetch_factor": 4, | |
| "persistent_workers": true, | |
| "steps": 10000, | |
| "eval_freq": -1, | |
| "log_freq": 20, | |
| "tolerance_s": 0.0001, | |
| "save_checkpoint": true, | |
| "save_freq": 1000, | |
| "use_policy_training_preset": true, | |
| "optimizer": { | |
| "type": "adamw", | |
| "lr": 1e-05, | |
| "weight_decay": 0.0, | |
| "grad_clip_norm": 1.0, | |
| "betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "eps": 1e-06 | |
| }, | |
| "scheduler": { | |
| "type": "molmoact2_cosine_decay_with_warmup", | |
| "num_warmup_steps": 200, | |
| "num_decay_steps": null, | |
| "peak_lr": 1e-05, | |
| "decay_lr": 1e-06 | |
| }, | |
| "eval": { | |
| "n_episodes": 50, | |
| "batch_size": 50, | |
| "use_async_envs": true | |
| }, | |
| "wandb": { | |
| "enable": true, | |
| "disable_artifact": true, | |
| "project": "vla_picknplace", | |
| "entity": null, | |
| "notes": null, | |
| "run_id": "we76vk6a", | |
| "mode": "online", | |
| "add_tags": true | |
| }, | |
| "peft": null, | |
| "sample_weighting": { | |
| "type": "manifest", | |
| "progress_path": null, | |
| "weights_path": "/mnt/vla_picknplace/outputs/molmoact2/decision_manifests/banana_act_direct_color_simple_v1_molmo_compat_manifest.parquet", | |
| "weight_column": "loss_weight_normalized", | |
| "index_column": "index", | |
| "normalize_batch_mean": false, | |
| "clip_min": 0.25, | |
| "clip_max": 4.0, | |
| "head_mode": "sparse", | |
| "kappa": 0.01, | |
| "epsilon": 1e-06, | |
| "extra_params": {} | |
| }, | |
| "frame_sampling": { | |
| "type": "manifest", | |
| "weights_path": "/mnt/vla_picknplace/outputs/molmoact2/decision_manifests/banana_act_direct_color_simple_v1_molmo_compat_manifest.parquet", | |
| "weight_column": "sample_weight", | |
| "index_column": "index", | |
| "replacement": false, | |
| "num_samples": null | |
| }, | |
| "prompt_contrast": { | |
| "type": "same_layout_reference", | |
| "manifest_path": "/mnt/vla_picknplace/outputs/molmoact2/decision_manifests/banana_act_direct_color_simple_v1_molmo_compat_manifest.parquet", | |
| "weight": 0.1, | |
| "max_anchors_per_batch": 1, | |
| "min_valid_horizon": 30, | |
| "phase_min": 0.55, | |
| "phase_max": 0.72, | |
| "max_reference_phase_delta": 0.03, | |
| "max_reference_state_l2": null, | |
| "max_reference_joint_abs": [ | |
| 40.0, | |
| 25.0, | |
| 25.0, | |
| 25.0, | |
| 30.0, | |
| 30.0 | |
| ], | |
| "require_complete_color_set": true, | |
| "colors": [ | |
| "red", | |
| "green", | |
| "blue" | |
| ], | |
| "same_noise_across_colors": true, | |
| "low_t_min": 0.001, | |
| "low_t_max": 0.1, | |
| "rank_loss_weight": 3.0, | |
| "rank_action_index": 5, | |
| "rank_action_dim": 0, | |
| "rank_margin": 0.03, | |
| "rank_temperature": 0.05, | |
| "rank_min_target_delta": 0.0001, | |
| "seed": 1000 | |
| }, | |
| "rename_map": {}, | |
| "checkpoint_path": null | |
| } |