seed: 42 num_steps_per_env: 24 max_iterations: 10001 obs_groups: policy: !!python/tuple - policy critic: !!python/tuple - critic save_interval: 100 experiment_name: go2_velocity run_name: '' logger: tensorboard wandb_project: mjlab wandb_tags: !!python/tuple [] resume: false load_run: .* load_checkpoint: model_.*.pt clip_actions: null class_name: OnPolicyRunner policy: init_noise_std: 1.0 noise_std_type: scalar actor_obs_normalization: true critic_obs_normalization: true actor_hidden_dims: !!python/tuple - 512 - 256 - 128 critic_hidden_dims: !!python/tuple - 512 - 256 - 128 activation: elu algorithm: num_learning_epochs: 5 num_mini_batches: 4 learning_rate: 0.001 schedule: adaptive gamma: 0.99 lam: 0.95 entropy_coef: 0.01 desired_kl: 0.01 max_grad_norm: 1.0 value_loss_coef: 1.0 use_clipped_value_loss: true clip_param: 0.2 normalize_advantage_per_mini_batch: false