seed: 42 num_steps_per_env: 24 max_iterations: 10000 obs_groups: actor: !!python/tuple - actor critic: !!python/tuple - critic save_interval: 50 experiment_name: go1_velocity run_name: '' logger: wandb wandb_project: mjlab wandb_tags: !!python/tuple [] resume: false load_run: .* load_checkpoint: model_.*.pt clip_actions: null upload_model: true class_name: OnPolicyRunner actor: hidden_dims: !!python/tuple - 512 - 256 - 128 activation: elu obs_normalization: false cnn_cfg: null distribution_cfg: class_name: GaussianDistribution init_std: 1.0 std_type: scalar rnn_type: null rnn_hidden_dim: 256 rnn_num_layers: 1 class_name: MLPModel critic: hidden_dims: !!python/tuple - 512 - 256 - 128 activation: elu obs_normalization: false cnn_cfg: null distribution_cfg: null rnn_type: null rnn_hidden_dim: 256 rnn_num_layers: 1 class_name: MLPModel algorithm: num_learning_epochs: 5 num_mini_batches: 4 learning_rate: 0.001 schedule: adaptive gamma: 0.99 lam: 0.95 entropy_coef: 0.01 desired_kl: 0.01 max_grad_norm: 1.0 value_loss_coef: 1.0 use_clipped_value_loss: true clip_param: 0.2 normalize_advantage_per_mini_batch: false optimizer: adam share_cnn_encoders: false class_name: PPO