Spaces:
Running
Running
| FROM python:3.11-slim | |
| ENV DEBIAN_FRONTEND=noninteractive \ | |
| MODEL_REPO=yuxinlu1/gemma-4-12B-agentic-fable5-composer2.5-v2-3.5x-tau2-GGUF \ | |
| MODEL_FILE=gemma4-v2-Q4_K_M.gguf \ | |
| MODEL_DIR=/data/models/gemma4-coder \ | |
| LLAMA_VERSION=b9592 \ | |
| LLAMA_DIR=/opt/llama.cpp \ | |
| LLAMA_SERVER_BIN=/opt/llama.cpp/llama-server \ | |
| LD_LIBRARY_PATH=/opt/llama.cpp \ | |
| LLAMA_HOST=0.0.0.0 \ | |
| LLAMA_PORT=7860 \ | |
| THREADS=4 \ | |
| CTX_SIZE=2048 \ | |
| BATCH_SIZE=default \ | |
| UBATCH_SIZE=default \ | |
| FLASH_ATTN=default \ | |
| CACHE_TYPE_K=default \ | |
| CACHE_TYPE_V=default \ | |
| GPU_LAYERS=0 \ | |
| TEMPERATURE=0.2 \ | |
| TOP_P=0.95 \ | |
| TOP_K=64 \ | |
| REPEAT_PENALTY=1.08 \ | |
| HF_XET_HIGH_PERFORMANCE=1 \ | |
| PYTHONUNBUFFERED=1 | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| ca-certificates \ | |
| curl \ | |
| libgomp1 \ | |
| libstdc++6 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| RUN mkdir -p "${LLAMA_DIR}" \ | |
| && curl -fL "https://github.com/ggml-org/llama.cpp/releases/download/${LLAMA_VERSION}/llama-${LLAMA_VERSION}-bin-ubuntu-x64.tar.gz" \ | |
| | tar -xz --strip-components=1 -C "${LLAMA_DIR}" \ | |
| && chmod +x "${LLAMA_SERVER_BIN}" | |
| RUN pip install --no-cache-dir \ | |
| huggingface_hub | |
| WORKDIR /app | |
| COPY app.py /app/app.py | |
| EXPOSE 7860 | |
| CMD ["python", "app.py"] | |