| # Unsloth Training Hub Dockerfile | |
| # CRITICAL: Installation order matters for Unsloth! | |
| FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 | |
| # Prevent interactive prompts | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y python3.11 python3.11-venv python3.11-dev python3-pip git wget curl ninja-build && rm -rf /var/lib/apt/lists/* | |
| # Set Python 3.11 as default | |
| RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 | |
| RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 | |
| # Create virtual environment | |
| RUN python3 -m venv /opt/venv | |
| ENV PATH="/opt/venv/bin:$PATH" | |
| # Upgrade pip and install build dependencies | |
| RUN pip install --upgrade pip wheel setuptools psutil packaging | |
| # ============================================================================ | |
| # CRITICAL: UNSLOTH INSTALLATION ORDER | |
| # 1. Install PyTorch first | |
| # 2. Install flash-attn separately with no-build-isolation | |
| # 3. Install unsloth | |
| # 4. Install vllm | |
| # 5. Install diffusers (for GRPO) | |
| # 6. Install TRL from git (latest) | |
| # ============================================================================ | |
| # Step 1: PyTorch with CUDA 12.1 | |
| RUN pip install torch==2.4.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 | |
| # Step 2: Install flash-attn separately (needs torch already installed) | |
| # Using --no-build-isolation so it can see the installed torch | |
| RUN pip install flash-attn --no-build-isolation | |
| # Step 3: Unsloth (main package) - now flash-attn is already installed | |
| RUN pip install "unsloth @ git+https://github.com/unslothai/unsloth.git" | |
| # Step 4: vLLM for fast inference during RL | |
| RUN pip install vllm | |
| # Step 5: diffusers (required for GRPO) | |
| RUN pip install diffusers | |
| # Step 6: TRL from git (latest for GRPO/GSPO support) | |
| RUN pip install git+https://github.com/huggingface/trl.git | |
| # Step 7: Additional dependencies | |
| RUN pip install gradio>=4.0.0 datasets>=2.18.0 anthropic>=0.39.0 huggingface_hub>=0.24.0 pyyaml accelerate>=0.30.0 sentencepiece protobuf | |
| # Set working directory | |
| WORKDIR /app | |
| # Copy application code | |
| COPY . /app/ | |
| # Create directories for runs | |
| RUN mkdir -p /app/runs /app/data /app/outputs | |
| # Environment variables | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV HF_HUB_ENABLE_HF_TRANSFER=1 | |
| ENV UNSLOTH_VLLM_STANDBY=1 | |
| # Expose Gradio port | |
| EXPOSE 7860 | |
| # Run app | |
| CMD ["python3", "app.py"] |