| seed: 42 | |
| num_steps_per_env: 24 | |
| max_iterations: 10000 | |
| obs_groups: | |
| actor: !!python/tuple | |
| - actor | |
| critic: !!python/tuple | |
| - critic | |
| save_interval: 50 | |
| experiment_name: go1_velocity | |
| run_name: '' | |
| logger: wandb | |
| wandb_project: mjlab | |
| wandb_tags: !!python/tuple [] | |
| resume: false | |
| load_run: .* | |
| load_checkpoint: model_.*.pt | |
| clip_actions: null | |
| upload_model: true | |
| class_name: OnPolicyRunner | |
| actor: | |
| hidden_dims: !!python/tuple | |
| - 512 | |
| - 256 | |
| - 128 | |
| activation: elu | |
| obs_normalization: false | |
| cnn_cfg: null | |
| distribution_cfg: | |
| class_name: GaussianDistribution | |
| init_std: 1.0 | |
| std_type: scalar | |
| rnn_type: null | |
| rnn_hidden_dim: 256 | |
| rnn_num_layers: 1 | |
| class_name: MLPModel | |
| critic: | |
| hidden_dims: !!python/tuple | |
| - 512 | |
| - 256 | |
| - 128 | |
| activation: elu | |
| obs_normalization: false | |
| cnn_cfg: null | |
| distribution_cfg: null | |
| rnn_type: null | |
| rnn_hidden_dim: 256 | |
| rnn_num_layers: 1 | |
| class_name: MLPModel | |
| algorithm: | |
| num_learning_epochs: 5 | |
| num_mini_batches: 4 | |
| learning_rate: 0.001 | |
| schedule: adaptive | |
| gamma: 0.99 | |
| lam: 0.95 | |
| entropy_coef: 0.01 | |
| desired_kl: 0.01 | |
| max_grad_norm: 1.0 | |
| value_loss_coef: 1.0 | |
| use_clipped_value_loss: true | |
| clip_param: 0.2 | |
| normalize_advantage_per_mini_batch: false | |
| optimizer: adam | |
| share_cnn_encoders: false | |
| class_name: PPO | |