Spaces:

Doom01
/

GemmaE4B

Paused

App Files Files Community

GemmaE4B / Dockerfile

Doom01

Update Dockerfile

7314747 verified 25 days ago

Raw

History Blame Contribute Delete

3.38 kB

	FROM python:3.11-slim

	# ── System dependencies ───────────────────────────────────────────────────────
	RUN apt-get update && apt-get install -y \
	curl \
	wget \
	git \
	nodejs \
	npm \
	build-essential \
	cmake \
	supervisor \
	libopenblas-dev \
	libgomp1 \
	&& rm -rf /var/lib/apt/lists/*

	# ── llama-cpp-python ──────────────────────────────────────────────────────────
	# IMPORTANT: Do NOT set CMAKE_ARGS here — that forces a full C++ source compile
	# which takes 15–30 min and times out on HF Spaces.
	# The prebuilt CPU wheels below are already AVX2/AVX512-optimized.
	# Real-world speedup comes from using a smaller model (see start.sh), not BLAS.
	RUN pip install --no-cache-dir \
	"llama-cpp-python[server]" \
	--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu

	# ── Server sub-dependencies (explicit safety net) ─────────────────────────────
	RUN pip install --no-cache-dir \
	sse-starlette \
	starlette \
	fastapi \
	"uvicorn[standard]" \
	pydantic \
	pydantic-settings \
	anyio \
	httpx

	# ── HuggingFace + utility packages ───────────────────────────────────────────
	RUN pip install --no-cache-dir \
	huggingface_hub \
	datasets \
	requests \
	schedule \
	diskcache \
	numpy \
	filelock

	# ── Node.js gateway ───────────────────────────────────────────────────────────
	WORKDIR /app
	COPY package.json .
	RUN npm install

	# ── Copy project files ────────────────────────────────────────────────────────
	COPY . .

	# ── Create required directories ───────────────────────────────────────────────
	RUN mkdir -p /app/models /app/data /app/logs

	# ── Permissions ───────────────────────────────────────────────────────────────
	RUN chmod +x start.sh

	# ── Verify all packages installed correctly ───────────────────────────────────
	RUN python3 -c "import llama_cpp; print('✅ llama_cpp', llama_cpp.__version__)" && \
	python3 -c "import llama_cpp.server; print('✅ llama_cpp.server')" && \
	python3 -c "import fastapi; print('✅ fastapi')" && \
	python3 -c "import uvicorn; print('✅ uvicorn')" && \
	python3 -c "import sse_starlette; print('✅ sse_starlette')" && \
	python3 -c "import huggingface_hub; print('✅ huggingface_hub')" && \
	python3 -c "import schedule; print('✅ schedule')" && \
	python3 -c "import filelock; print('✅ filelock')" && \
	echo "✅ All checks passed!"

	EXPOSE 7860

	CMD ["./start.sh"]