| FROM python:3.11-slim | |
| # ββ System dependencies βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| RUN apt-get update && apt-get install -y \ | |
| curl \ | |
| wget \ | |
| git \ | |
| nodejs \ | |
| npm \ | |
| build-essential \ | |
| cmake \ | |
| supervisor \ | |
| libopenblas-dev \ | |
| libgomp1 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # ββ llama-cpp-python ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # IMPORTANT: Do NOT set CMAKE_ARGS here β that forces a full C++ source compile | |
| # which takes 15β30 min and times out on HF Spaces. | |
| # The prebuilt CPU wheels below are already AVX2/AVX512-optimized. | |
| # Real-world speedup comes from using a smaller model (see start.sh), not BLAS. | |
| RUN pip install --no-cache-dir \ | |
| "llama-cpp-python[server]" \ | |
| --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu | |
| # ββ Server sub-dependencies (explicit safety net) βββββββββββββββββββββββββββββ | |
| RUN pip install --no-cache-dir \ | |
| sse-starlette \ | |
| starlette \ | |
| fastapi \ | |
| "uvicorn[standard]" \ | |
| pydantic \ | |
| pydantic-settings \ | |
| anyio \ | |
| httpx | |
| # ββ HuggingFace + utility packages βββββββββββββββββββββββββββββββββββββββββββ | |
| RUN pip install --no-cache-dir \ | |
| huggingface_hub \ | |
| datasets \ | |
| requests \ | |
| schedule \ | |
| diskcache \ | |
| numpy \ | |
| filelock | |
| # ββ Node.js gateway βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WORKDIR /app | |
| COPY package.json . | |
| RUN npm install | |
| # ββ Copy project files ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COPY . . | |
| # ββ Create required directories βββββββββββββββββββββββββββββββββββββββββββββββ | |
| RUN mkdir -p /app/models /app/data /app/logs | |
| # ββ Permissions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| RUN chmod +x start.sh | |
| # ββ Verify all packages installed correctly βββββββββββββββββββββββββββββββββββ | |
| RUN python3 -c "import llama_cpp; print('β llama_cpp', llama_cpp.__version__)" && \ | |
| python3 -c "import llama_cpp.server; print('β llama_cpp.server')" && \ | |
| python3 -c "import fastapi; print('β fastapi')" && \ | |
| python3 -c "import uvicorn; print('β uvicorn')" && \ | |
| python3 -c "import sse_starlette; print('β sse_starlette')" && \ | |
| python3 -c "import huggingface_hub; print('β huggingface_hub')" && \ | |
| python3 -c "import schedule; print('β schedule')" && \ | |
| python3 -c "import filelock; print('β filelock')" && \ | |
| echo "β All checks passed!" | |
| EXPOSE 7860 | |
| CMD ["./start.sh"] |