FROM python:3.11-slim RUN apt-get update && apt-get install -y \ ffmpeg git curl nodejs npm build-essential cmake pkg-config libopenblas-dev \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # CPU torch for Hugging Face Spaces CPU. Keep this aligned with requirements pins. RUN pip install --no-cache-dir \ torch==2.4.1 torchaudio==2.4.1 \ --index-url https://download.pytorch.org/whl/cpu COPY wolof_voice_agent/requirements.txt ./requirements.txt RUN pip install --no-cache-dir -r requirements.txt # Build llama-cpp-python inside Debian. The prebuilt wheel previously loaded a # musl-linked libllama.so on Spaces and failed with libc.musl-x86_64.so.1. RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \ FORCE_CMAKE=1 \ pip install --no-cache-dir --no-binary llama-cpp-python llama-cpp-python==0.2.90 && \ pip install --no-cache-dir diskcache jinja2 ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface ENV PYTHONPATH=/app/wolof_voice_agent ENV GGUF_REPO=DevQuasar-6/soynade-research.Oolel-v0.1-GGUF ENV GGUF_SOURCE_FILENAME=soynade-research.Oolel-v0.1.Q4_K_M.gguf ENV GGUF_FILENAME=oolel-v0.1-q4_k_m.gguf ENV DISABLE_TTS=1 ENV VITE_DISABLE_TTS=1 ENV DISABLE_ASR=1 ENV VITE_DISABLE_ASR=1 ENV SPACE_LLM_MAX_TOKENS=220 RUN mkdir -p /app/wolof_voice_agent/data/cache/huggingface \ /app/wolof_voice_agent/models/gguf RUN python - <<'PYEOF' import os import shutil from huggingface_hub import snapshot_download, hf_hub_download cache = os.environ["HF_HOME"] gguf_dir = "/app/wolof_voice_agent/models/gguf" gguf_repo = os.environ["GGUF_REPO"] gguf_source_file = os.environ["GGUF_SOURCE_FILENAME"] gguf_runtime_file = os.environ["GGUF_FILENAME"] print(f"Downloading LLM GGUF: {gguf_source_file} from {gguf_repo} ...") src = hf_hub_download(repo_id=gguf_repo, filename=gguf_source_file, local_dir=gguf_dir) dst = os.path.join(gguf_dir, gguf_runtime_file) if src != dst: shutil.copy2(src, dst) print("All models ready.") PYEOF COPY frontend/package.json frontend/package-lock.json \ /app/wolof_voice_agent/frontend/ RUN cd /app/wolof_voice_agent/frontend && npm ci COPY frontend/ /app/wolof_voice_agent/frontend/ RUN cd /app/wolof_voice_agent/frontend && npm run build COPY wolof_voice_agent/ /app/wolof_voice_agent/ WORKDIR /app/wolof_voice_agent ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface ENV PYTHONPATH=/app/wolof_voice_agent ENV HF_DATASETS_OFFLINE=1 ENV DISABLE_TTS=1 ENV DISABLE_ASR=1 ENV SPACE_LLM_MAX_TOKENS=220 EXPOSE 7860 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]