khAdI / Dockerfile
Mouhamed Naby NDIAYE
Run Space as text-only LLM demo
51a43a5
Raw
History Blame Contribute Delete
2.61 kB
FROM python:3.11-slim
RUN apt-get update && apt-get install -y \
ffmpeg git curl nodejs npm build-essential cmake pkg-config libopenblas-dev \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# CPU torch for Hugging Face Spaces CPU. Keep this aligned with requirements pins.
RUN pip install --no-cache-dir \
torch==2.4.1 torchaudio==2.4.1 \
--index-url https://download.pytorch.org/whl/cpu
COPY wolof_voice_agent/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Build llama-cpp-python inside Debian. The prebuilt wheel previously loaded a
# musl-linked libllama.so on Spaces and failed with libc.musl-x86_64.so.1.
RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
FORCE_CMAKE=1 \
pip install --no-cache-dir --no-binary llama-cpp-python llama-cpp-python==0.2.90 && \
pip install --no-cache-dir diskcache jinja2
ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
ENV PYTHONPATH=/app/wolof_voice_agent
ENV GGUF_REPO=DevQuasar-6/soynade-research.Oolel-v0.1-GGUF
ENV GGUF_SOURCE_FILENAME=soynade-research.Oolel-v0.1.Q4_K_M.gguf
ENV GGUF_FILENAME=oolel-v0.1-q4_k_m.gguf
ENV DISABLE_TTS=1
ENV VITE_DISABLE_TTS=1
ENV DISABLE_ASR=1
ENV VITE_DISABLE_ASR=1
ENV SPACE_LLM_MAX_TOKENS=220
RUN mkdir -p /app/wolof_voice_agent/data/cache/huggingface \
/app/wolof_voice_agent/models/gguf
RUN python - <<'PYEOF'
import os
import shutil
from huggingface_hub import snapshot_download, hf_hub_download
cache = os.environ["HF_HOME"]
gguf_dir = "/app/wolof_voice_agent/models/gguf"
gguf_repo = os.environ["GGUF_REPO"]
gguf_source_file = os.environ["GGUF_SOURCE_FILENAME"]
gguf_runtime_file = os.environ["GGUF_FILENAME"]
print(f"Downloading LLM GGUF: {gguf_source_file} from {gguf_repo} ...")
src = hf_hub_download(repo_id=gguf_repo, filename=gguf_source_file, local_dir=gguf_dir)
dst = os.path.join(gguf_dir, gguf_runtime_file)
if src != dst:
shutil.copy2(src, dst)
print("All models ready.")
PYEOF
COPY frontend/package.json frontend/package-lock.json \
/app/wolof_voice_agent/frontend/
RUN cd /app/wolof_voice_agent/frontend && npm ci
COPY frontend/ /app/wolof_voice_agent/frontend/
RUN cd /app/wolof_voice_agent/frontend && npm run build
COPY wolof_voice_agent/ /app/wolof_voice_agent/
WORKDIR /app/wolof_voice_agent
ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
ENV PYTHONPATH=/app/wolof_voice_agent
ENV HF_DATASETS_OFFLINE=1
ENV DISABLE_TTS=1
ENV DISABLE_ASR=1
ENV SPACE_LLM_MAX_TOKENS=220
EXPOSE 7860
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]