seed: 42 num_steps_per_env: 24 max_iterations: 3000 obs_groups: actor: !!python/tuple - actor - camera critic: !!python/tuple - critic - camera save_interval: 100 experiment_name: yam_lift_cube_vision run_name: '' logger: wandb wandb_project: mjlab wandb_tags: !!python/tuple [] resume: false load_run: .* load_checkpoint: model_.*.pt clip_actions: null upload_model: true class_name: OnPolicyRunner actor: hidden_dims: !!python/tuple - 256 - 256 - 128 activation: elu obs_normalization: true cnn_cfg: output_channels: - 16 - 32 kernel_size: - 5 - 3 stride: - 2 - 2 padding: zeros activation: elu max_pool: false global_pool: none spatial_softmax: true spatial_softmax_temperature: 1.0 distribution_cfg: class_name: GaussianDistribution init_std: 1.0 std_type: scalar rnn_type: null rnn_hidden_dim: 256 rnn_num_layers: 1 class_name: mjlab.rl.spatial_softmax:SpatialSoftmaxCNNModel critic: hidden_dims: !!python/tuple - 256 - 256 - 128 activation: elu obs_normalization: true cnn_cfg: output_channels: - 16 - 32 kernel_size: - 5 - 3 stride: - 2 - 2 padding: zeros activation: elu max_pool: false global_pool: none spatial_softmax: true spatial_softmax_temperature: 1.0 distribution_cfg: null rnn_type: null rnn_hidden_dim: 256 rnn_num_layers: 1 class_name: mjlab.rl.spatial_softmax:SpatialSoftmaxCNNModel algorithm: num_learning_epochs: 5 num_mini_batches: 4 learning_rate: 0.001 schedule: adaptive gamma: 0.99 lam: 0.95 entropy_coef: 0.005 desired_kl: 0.01 max_grad_norm: 1.0 value_loss_coef: 1.0 use_clipped_value_loss: true clip_param: 0.2 normalize_advantage_per_mini_batch: false optimizer: adam share_cnn_encoders: false class_name: PPO