ARG PYTHON_VERSION=3.11 ARG UV_VERSION=0.6.17 # Pinned 2026-04-15. Update via Dependabot or: docker pull python:3.11-slim ARG PYTHON_DIGEST=sha256:233de06753d30d120b1a3ce359d8d3be8bda78524cd8f520c99883bfe33964cf ARG BASE_IMAGE=python:${PYTHON_VERSION}-slim@${PYTHON_DIGEST} # Pinned 2026-04-15. Update via Dependabot or: docker pull gcr.io/distroless/python3-debian13 ARG DISTROLESS_DIGEST=sha256:ed3a4beb46f8f8baac068743ba1b1f95ea3f793422129cf6dd23967f779b6018 ARG DISTROLESS_IMAGE=gcr.io/distroless/python3-debian13 ARG PYTHON_SITE_PACKAGES=/usr/local/lib/python${PYTHON_VERSION}/site-packages # ---- Build stage: compile native extensions, build wheel ---- FROM python:${PYTHON_VERSION}-slim@${PYTHON_DIGEST} AS builder ARG UV_VERSION RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ g++ \ && rm -rf /var/lib/apt/lists/* RUN python -m pip install --no-cache-dir uv==${UV_VERSION} WORKDIR /build # Layer 1: install deps only (cached unless pyproject.toml/uv.lock change) COPY pyproject.toml uv.lock README.md ./ # Stub package so uv can resolve the local extras without full source RUN mkdir -p headroom && touch headroom/__init__.py ARG HEADROOM_EXTRAS=proxy,code,ml # Install base extras, then swap onnxruntime for GPU variant if requested. # When gpu extra is set, torch (from [ml]) pulls nvidia-*-cu13 packages. # onnxruntime-gpu needs CUDA 12 libs (.so.12), so we force-install the cu12 # variants alongside cu13 — they coexist in separate site-packages dirs. RUN --mount=type=cache,target=/root/.cache/uv \ UV_HTTP_TIMEOUT=300 uv pip install --system ".[${HEADROOM_EXTRAS}]" && \ if echo "${HEADROOM_EXTRAS}" | grep -q gpu; then \ uv pip uninstall --system onnxruntime && \ UV_HTTP_TIMEOUT=300 uv pip install --system --force-reinstall \ "onnxruntime-gpu>=1.16.0" \ "nvidia-cublas-cu12>=12.0" \ "nvidia-cuda-runtime-cu12>=12.0" \ "nvidia-cudnn-cu12>=9.0" \ "nvidia-cufft-cu12>=11.0" \ "nvidia-cuda-nvrtc-cu12>=12.0"; \ fi # Layer 2: copy real source, reinstall only headroom-ai (no deps) COPY headroom/ headroom/ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system --no-deps --reinstall-package headroom-ai . # ---- Runtime stage: python-slim with CUDA 12 + 13 pip libs ---- FROM ${BASE_IMAGE} AS runtime-slim-base ARG RUNTIME_USER=nonroot ARG PYTHON_VERSION ARG PYTHON_SITE_PACKAGES RUN apt-get update && \ apt-get install -y --no-install-recommends curl libgomp1 && \ rm -rf /var/lib/apt/lists/* # Copy site-packages from builder (includes nvidia-*-cu12 + cu13 pip libs). COPY --from=builder ${PYTHON_SITE_PACKAGES} ${PYTHON_SITE_PACKAGES} # Create headroom CLI entrypoint — exec python3 from this image's PATH. RUN echo '#!/bin/sh' > /usr/local/bin/headroom && \ echo 'exec python3 -m headroom.cli "$@"' >> /usr/local/bin/headroom && \ chmod +x /usr/local/bin/headroom RUN mkdir -p /home/nonroot /data && \ if [ "$RUNTIME_USER" = "nonroot" ]; then \ groupadd --gid 1000 nonroot && \ useradd --uid 1000 --gid nonroot --create-home nonroot && \ mkdir -p /home/nonroot/.headroom && \ chown -R nonroot:nonroot /data /home/nonroot; \ else \ mkdir -p /root/.headroom; \ fi USER ${RUNTIME_USER} WORKDIR /home/nonroot ENV HEADROOM_HOST=0.0.0.0 \ HEADROOM_PORT=7860 \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ LD_LIBRARY_PATH=${PYTHON_SITE_PACKAGES}/nvidia/cublas/lib:${PYTHON_SITE_PACKAGES}/nvidia/cuda_runtime/lib:${PYTHON_SITE_PACKAGES}/nvidia/cudnn/lib:${PYTHON_SITE_PACKAGES}/nvidia/cufft/lib:${PYTHON_SITE_PACKAGES}/nvidia/curand/lib:${PYTHON_SITE_PACKAGES}/nvidia/cuda_nvrtc/lib EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \ CMD curl --fail --silent "http://127.0.0.1:${HEADROOM_PORT:-7860}/readyz" ENTRYPOINT ["headroom", "proxy"] FROM ${DISTROLESS_IMAGE}@${DISTROLESS_DIGEST} AS runtime-slim ARG RUNTIME_USER=nonroot ARG PYTHON_SITE_PACKAGES COPY --from=builder ${PYTHON_SITE_PACKAGES} ${PYTHON_SITE_PACKAGES} USER ${RUNTIME_USER} WORKDIR /app ENV HEADROOM_HOST=0.0.0.0 \ HEADROOM_PORT=7860 \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONPATH=${PYTHON_SITE_PACKAGES} \ LD_LIBRARY_PATH=${PYTHON_SITE_PACKAGES}/nvidia/cublas/lib:${PYTHON_SITE_PACKAGES}/nvidia/cuda_runtime/lib:${PYTHON_SITE_PACKAGES}/nvidia/cudnn/lib:${PYTHON_SITE_PACKAGES}/nvidia/cufft/lib:${PYTHON_SITE_PACKAGES}/nvidia/curand/lib:${PYTHON_SITE_PACKAGES}/nvidia/cuda_nvrtc/lib EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \ CMD ["python3", "-c", "import os, urllib.request; port = os.environ.get('HEADROOM_PORT', '7860'); urllib.request.urlopen(f'http://127.0.0.1:{port}/readyz', timeout=5)"] ENTRYPOINT ["python3", "-m", "headroom.cli", "proxy"] # Default published image remains python-slim runtime FROM runtime-slim-base AS runtime