FROM python:3.11-slim # System deps RUN apt-get update && apt-get install -y --no-install-recommends \ git curl build-essential && \ rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir --upgrade pip setuptools wheel WORKDIR /app # PyTorch RUN pip install --no-cache-dir torch>=2.4.0 --index-url https://download.pytorch.org/whl/cu124 # Dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Unsloth RUN pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" # V2 training script COPY train_grpo_hf.py . # V2 training command CMD ["python3", "train_grpo_hf.py", "--model", "unsloth/Qwen3.5-27B", "--dataset", "balarajr/triage-grpo", "--hub-model", "BharathPESU/triage-agent-27b-v2", "--push", "--epochs", "3", "--batch-size", "1", "--grad-accum", "1", "--num-gen", "2", "--lr", "5e-6", "--augment-hf", "--augment-max", "300"]