#!/usr/bin/env bash # Run DreamerV3-torch on AntMaze-Medium-Play with exploitation logging. # Usage: bash run_antmaze.sh [logdir] set -e LOGDIR="${1:-./logdir/antmaze_medium_play}" mkdir -p "$LOGDIR" cd "$(dirname "$0")/dreamerv3-torch" echo "=== Starting DreamerV3 on AntMaze-Medium-Play ===" echo " logdir: $LOGDIR" echo " GPU: $(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo 'N/A')" echo "" # Key TensorBoard metrics to watch for model exploitation: # imag_reward_mean — policy imaginary reward (in-model) # env_reward_mean — actual environment reward (from replay buffer) # kl — RSSM posterior/prior divergence # reward_loss — world model reward head loss # # Exploitation signal: imag_reward_mean >> env_reward_mean and growing gap conda run -n dreamer python dreamer.py \ --configs antmaze \ --task antmaze_medium-play \ --logdir "$LOGDIR" \ --seed 0 echo "" echo "=== Done. Launch TensorBoard with: ===" echo " conda run -n dreamer tensorboard --logdir $LOGDIR"