Spaces:
Running on L40S
Running on L40S
| # syntax=docker/dockerfile:1.6 | |
| # CUDA 12.8 runtime β gemlite/Triton kernels JIT against the runtime ptxas | |
| # that comes with this image; no need for the larger -devel variant. | |
| FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04 | |
| # ββ system deps ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # build-essential is needed because some sdists (gemlite among them) compile C | |
| # extensions at install time. python3 is the host interpreter that bootstraps | |
| # uv; uv then provisions its own pinned interpreter for the venv. nginx fronts | |
| # everything on :7860 (frontend, backend API, dashboard). procps gives us pkill, | |
| # which entrypoint.sh uses to reap orphaned workers from a prior crashed boot. | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| ca-certificates curl git build-essential python3 python3-venv \ | |
| libgomp1 libssl3 nginx openssl procps \ | |
| && rm -rf /var/lib/apt/lists/* \ | |
| && chown -R 1000:1000 /var/lib/nginx /var/log/nginx /run | |
| # Non-root user: HF Spaces convention is uid 1000. | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user PATH="/home/user/.local/bin:$PATH" | |
| # uv (Python venv + package manager). The demo's setup.sh assumes it's on PATH. | |
| RUN curl -fsSL https://astral.sh/uv/install.sh | sh | |
| WORKDIR /home/user/app | |
| # ββ clone + run setup.sh in one RUN so GH_TOKEN never lands in a layer βββββββ | |
| # GH_TOKEN is supplied by `--mount=type=secret`; the secret file is only | |
| # visible during this single RUN and is not stored in the image. | |
| # SKIP_DOWNLOAD=1 keeps setup.sh from pulling the 3.5 GB model at build time | |
| # β entrypoint.sh handles that at boot so a Space restart doesn't have to | |
| # rebuild the image. | |
| RUN --mount=type=secret,id=GH_TOKEN,uid=1000,required=true \ | |
| git config --global credential.helper '!f() { echo "username=oauth2"; echo "password=$(cat /run/secrets/GH_TOKEN)"; }; f' \ | |
| && git clone https://github.com/PrismML-Eng/Bonsai-image-demo.git . \ | |
| && SKIP_DOWNLOAD=1 BONSAI_PACKAGE_MIN_AGE_DAYS=0 ./setup.sh \ | |
| && git config --global --unset credential.helper | |
| # ββ pre-build the Next.js frontend βββββββββββββββββββββββββββββββββββββββββββ | |
| # Baking the build into the image so the first browser visit doesn't pay | |
| # `npm install + next build` (~2 min) on top of model load. NEXT_PUBLIC_* | |
| # vars are inlined at build time and don't change at runtime, so the | |
| # backend URL (always loopback inside this container) is baked too. | |
| # | |
| # NEXT_PUBLIC_BACKEND_URL points at NGINX (:7860), NOT a single backend | |
| # replica (:8000). The studio's /api/generate route proxies to | |
| # `${NEXT_PUBLIC_BACKEND_URL}/generate`; sending that to :8000 pins ALL UI | |
| # traffic to replica 0 and bypasses nginx's least_conn upstream entirely | |
| # (only direct /generate + /backends calls were getting balanced). Routing | |
| # through :7860 hits the `location ~ ^/(generate|...)` block, which | |
| # load-balances across every replica (8000..800N). The extra loopback hop | |
| # (next β nginx β backend) is ~1ms. | |
| # | |
| # Demo-only patch: strip the "Batch of 4" mode from the studio's mode picker. | |
| # On a shared GPU under launch load, one batch click ties up a generation | |
| # slot for 4Γ as long. We do this with a `sed` on the cloned source (NOT in | |
| # the image-studio repo) so the upstream stays untouched. The `! grep` guard | |
| # fails the build loudly if upstream renames the option and the sed misses β | |
| # otherwise batch would silently come back on the next rebuild. | |
| RUN cd vendor/image-studio/frontend \ | |
| && sed -i '/{ value: "batch", label: "Batch of 4" },/d' components/studio-client.tsx \ | |
| && ! grep -q 'Batch of 4' components/studio-client.tsx \ | |
| && echo "[patch] removed Batch-of-4 mode from studio UI" \ | |
| && PATH="$HOME/app/.venv/bin:$PATH" \ | |
| NEXT_PUBLIC_BACKEND_URL=http://127.0.0.1:7860 \ | |
| npm install --no-audit --no-fund \ | |
| && PATH="$HOME/app/.venv/bin:$PATH" \ | |
| NEXT_PUBLIC_BACKEND_URL=http://127.0.0.1:7860 \ | |
| npm run build | |
| # ββ Space-local files ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # All Space-specific code lives under space/ (Python package + sidecar + | |
| # dashboard + nginx config + entrypoint). The demo's own code stays at the | |
| # repo root (cloned earlier) so the two namespaces don't collide. | |
| COPY --chown=user space/ /home/user/app/space/ | |
| RUN chmod +x /home/user/app/space/entrypoint.sh | |
| EXPOSE 7860 | |
| CMD ["/home/user/app/space/entrypoint.sh"] | |