FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 RUN apt-get update && \ apt-get install -y python3 python3-pip git && \ rm -rf /var/lib/apt/lists/* RUN pip3 install --no-cache-dir \ torch==2.6.0 --index-url https://download.pytorch.org/whl/cu124 RUN pip3 install --no-cache-dir \ transformers \ accelerate \ bitsandbytes \ gradio \ scipy # flash-linear-attention: Triton-based SSM kernels (JIT-compiled, no CUDA build needed) RUN pip3 install --no-cache-dir flash-linear-attention RUN useradd -m -u 1000 user USER user ENV HOME=/home/user ENV HF_HOME=/home/user/.cache/huggingface WORKDIR /home/user/app COPY --chown=user app.py . EXPOSE 7860 CMD ["python3", "app.py"]