| FROM python:3.11-slim |
|
|
| ENV DEBIAN_FRONTEND=noninteractive \ |
| MODEL_REPO=yuxinlu1/gemma-4-12B-agentic-fable5-composer2.5-v2-3.5x-tau2-GGUF \ |
| MODEL_FILE=gemma4-v2-Q4_K_M.gguf \ |
| MODEL_DIR=/data/models/gemma4-coder \ |
| LLAMA_VERSION=b9592 \ |
| LLAMA_DIR=/opt/llama.cpp \ |
| LLAMA_SERVER_BIN=/opt/llama.cpp/llama-server \ |
| LD_LIBRARY_PATH=/opt/llama.cpp \ |
| LLAMA_HOST=0.0.0.0 \ |
| LLAMA_PORT=7860 \ |
| THREADS=4 \ |
| CTX_SIZE=2048 \ |
| BATCH_SIZE=default \ |
| UBATCH_SIZE=default \ |
| FLASH_ATTN=default \ |
| CACHE_TYPE_K=default \ |
| CACHE_TYPE_V=default \ |
| GPU_LAYERS=0 \ |
| TEMPERATURE=0.2 \ |
| TOP_P=0.95 \ |
| TOP_K=64 \ |
| REPEAT_PENALTY=1.08 \ |
| HF_XET_HIGH_PERFORMANCE=1 \ |
| PYTHONUNBUFFERED=1 |
|
|
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| ca-certificates \ |
| curl \ |
| libgomp1 \ |
| libstdc++6 \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| RUN mkdir -p "${LLAMA_DIR}" \ |
| && curl -fL "https://github.com/ggml-org/llama.cpp/releases/download/${LLAMA_VERSION}/llama-${LLAMA_VERSION}-bin-ubuntu-x64.tar.gz" \ |
| | tar -xz --strip-components=1 -C "${LLAMA_DIR}" \ |
| && chmod +x "${LLAMA_SERVER_BIN}" |
|
|
| RUN pip install --no-cache-dir \ |
| huggingface_hub |
|
|
| WORKDIR /app |
| COPY app.py /app/app.py |
|
|
| EXPOSE 7860 |
| CMD ["python", "app.py"] |
|
|