File size: 3,869 Bytes

6d851a7

{
    "type": "molmoact2",
    "n_obs_steps": 1,
    "input_features": {
        "observation.state": {
            "type": "STATE",
            "shape": [
                6
            ]
        },
        "observation.images.top": {
            "type": "VISUAL",
            "shape": [
                3,
                720,
                1280
            ]
        },
        "observation.images.side": {
            "type": "VISUAL",
            "shape": [
                3,
                720,
                1280
            ]
        },
        "observation.images.wrist": {
            "type": "VISUAL",
            "shape": [
                3,
                720,
                1280
            ]
        }
    },
    "output_features": {
        "action": {
            "type": "ACTION",
            "shape": [
                6
            ]
        }
    },
    "device": "cuda",
    "use_amp": false,
    "use_peft": false,
    "push_to_hub": true,
    "repo_id": "autmoate/molmoact2_put_mug_large_both_bs16_20k",
    "private": null,
    "tags": null,
    "license": null,
    "pretrained_path": null,
    "checkpoint_path": "allenai/MolmoAct2-SO100_101",
    "checkpoint_revision": null,
    "checkpoint_force_download": false,
    "chunk_size": 10,
    "n_action_steps": 10,
    "action_mode": "both",
    "inference_action_mode": null,
    "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer",
    "discrete_generation_max_steps": null,
    "norm_tag": null,
    "setup_type": "single SO-100 follower arm with top, wrist, and side RGB cameras",
    "control_mode": "absolute joint pose",
    "image_keys": [
        "observation.images.top",
        "observation.images.wrist",
        "observation.images.side"
    ],
    "normalize_language": true,
    "add_setup_tokens": true,
    "add_control_tokens": true,
    "normalize_gripper": true,
    "num_state_tokens": 256,
    "max_sequence_length": null,
    "expected_max_action_dim": 32,
    "num_flow_timesteps": 8,
    "flow_matching_cutoff": 1.0,
    "flow_matching_time_offset": 0.001,
    "flow_matching_time_scale": 0.999,
    "flow_matching_beta_alpha": 1.0,
    "flow_matching_beta_beta": 1.5,
    "num_inference_steps": null,
    "mask_action_dim_padding": true,
    "enable_inference_cuda_graph": true,
    "per_episode_seed": false,
    "eval_seed": null,
    "rtc_config": null,
    "enable_lora_vlm": true,
    "lora_rank": 64,
    "lora_alpha": 16,
    "lora_dropout": 0.05,
    "lora_bias": "none",
    "enable_lora_action_expert": false,
    "enable_knowledge_insulation": false,
    "freeze_embedding": true,
    "train_action_expert_only": false,
    "gradient_checkpointing": true,
    "model_dtype": "bfloat16",
    "softmax_auxiliary_loss": true,
    "softmax_auxiliary_loss_scale": 0.0001,
    "discrete_loss_token_weighting": "root_subsegments_root_tokens",
    "optimizer_lr": 1e-05,
    "optimizer_vit_lr": 5e-06,
    "optimizer_connector_lr": 5e-06,
    "optimizer_action_expert_lr": 5e-05,
    "optimizer_betas": [
        0.9,
        0.95
    ],
    "optimizer_eps": 1e-06,
    "optimizer_weight_decay": 0.0,
    "optimizer_grad_clip_norm": 1.0,
    "scheduler_warmup_steps": 200,
    "scheduler_decay_steps": null,
    "scheduler_decay_lr": 1e-06,
    "normalization_mapping": {
        "VISUAL": "IDENTITY",
        "STATE": "QUANTILES",
        "ACTION": "QUANTILES"
    },
    "dataset_feature_names": {
        "action": [
            "shoulder_pan.pos",
            "shoulder_lift.pos",
            "elbow_flex.pos",
            "wrist_flex.pos",
            "wrist_roll.pos",
            "gripper.pos"
        ],
        "observation.state": [
            "shoulder_pan.pos",
            "shoulder_lift.pos",
            "elbow_flex.pos",
            "wrist_flex.pos",
            "wrist_roll.pos",
            "gripper.pos"
        ]
    }
}