File size: 1,300 Bytes
6e23cd8 e3d935d 6e23cd8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | FROM python:3.11-slim
ENV DEBIAN_FRONTEND=noninteractive \
MODEL_REPO=yuxinlu1/gemma-4-12B-agentic-fable5-composer2.5-v2-3.5x-tau2-GGUF \
MODEL_FILE=gemma4-v2-Q4_K_M.gguf \
MODEL_DIR=/data/models/gemma4-coder \
LLAMA_VERSION=b9592 \
LLAMA_DIR=/opt/llama.cpp \
LLAMA_SERVER_BIN=/opt/llama.cpp/llama-server \
LD_LIBRARY_PATH=/opt/llama.cpp \
LLAMA_HOST=0.0.0.0 \
LLAMA_PORT=7860 \
THREADS=4 \
CTX_SIZE=2048 \
BATCH_SIZE=default \
UBATCH_SIZE=default \
FLASH_ATTN=default \
CACHE_TYPE_K=default \
CACHE_TYPE_V=default \
GPU_LAYERS=0 \
TEMPERATURE=0.2 \
TOP_P=0.95 \
TOP_K=64 \
REPEAT_PENALTY=1.08 \
HF_XET_HIGH_PERFORMANCE=1 \
PYTHONUNBUFFERED=1
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
libgomp1 \
libstdc++6 \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p "${LLAMA_DIR}" \
&& curl -fL "https://github.com/ggml-org/llama.cpp/releases/download/${LLAMA_VERSION}/llama-${LLAMA_VERSION}-bin-ubuntu-x64.tar.gz" \
| tar -xz --strip-components=1 -C "${LLAMA_DIR}" \
&& chmod +x "${LLAMA_SERVER_BIN}"
RUN pip install --no-cache-dir \
huggingface_hub
WORKDIR /app
COPY app.py /app/app.py
EXPOSE 7860
CMD ["python", "app.py"]
|