# syntax=docker/dockerfile:1 FROM nvidia/cuda:13.1.0-devel-ubuntu26.04 AS base RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip python3-dev python3-venv \ gcc g++ \ wget curl jq \ && rm -rf /var/lib/apt/lists/* RUN ln -sf /usr/bin/python3 /usr/bin/python RUN pip3 install --no-cache-dir vllm RUN python3 -c "import vllm; print('vLLM', vllm.__version__)" FROM base AS runtime ENV CC=/usr/bin/gcc ENV CUDA_HOME=/usr/local/cuda-13.1 ENV VLLM_USE_FLASHINFER_SAMPLER=1 ENV TORCH_MATMUL_PRECISION=high ENV PATH=/usr/local/cuda-13.1/bin:/usr/local/bin:/usr/bin:/bin EXPOSE 8000 COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] CMD []