FROM python:3.11-slim # ── System dependencies ─────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y \ curl \ wget \ git \ nodejs \ npm \ build-essential \ cmake \ supervisor \ libopenblas-dev \ libgomp1 \ && rm -rf /var/lib/apt/lists/* # ── llama-cpp-python ────────────────────────────────────────────────────────── # IMPORTANT: Do NOT set CMAKE_ARGS here — that forces a full C++ source compile # which takes 15–30 min and times out on HF Spaces. # The prebuilt CPU wheels below are already AVX2/AVX512-optimized. # Real-world speedup comes from using a smaller model (see start.sh), not BLAS. RUN pip install --no-cache-dir \ "llama-cpp-python[server]" \ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # ── Server sub-dependencies (explicit safety net) ───────────────────────────── RUN pip install --no-cache-dir \ sse-starlette \ starlette \ fastapi \ "uvicorn[standard]" \ pydantic \ pydantic-settings \ anyio \ httpx # ── HuggingFace + utility packages ─────────────────────────────────────────── RUN pip install --no-cache-dir \ huggingface_hub \ datasets \ requests \ schedule \ diskcache \ numpy \ filelock # ── Node.js gateway ─────────────────────────────────────────────────────────── WORKDIR /app COPY package.json . RUN npm install # ── Copy project files ──────────────────────────────────────────────────────── COPY . . # ── Create required directories ─────────────────────────────────────────────── RUN mkdir -p /app/models /app/data /app/logs # ── Permissions ─────────────────────────────────────────────────────────────── RUN chmod +x start.sh # ── Verify all packages installed correctly ─────────────────────────────────── RUN python3 -c "import llama_cpp; print('✅ llama_cpp', llama_cpp.__version__)" && \ python3 -c "import llama_cpp.server; print('✅ llama_cpp.server')" && \ python3 -c "import fastapi; print('✅ fastapi')" && \ python3 -c "import uvicorn; print('✅ uvicorn')" && \ python3 -c "import sse_starlette; print('✅ sse_starlette')" && \ python3 -c "import huggingface_hub; print('✅ huggingface_hub')" && \ python3 -c "import schedule; print('✅ schedule')" && \ python3 -c "import filelock; print('✅ filelock')" && \ echo "✅ All checks passed!" EXPOSE 7860 CMD ["./start.sh"]