Spaces:
Sleeping
Sleeping
File size: 2,605 Bytes
b5b1a14 7b5e2ea b5b1a14 7b5e2ea b5b1a14 7b5e2ea 531e3de b5b1a14 f593adc 7b5e2ea dda8acb 51a43a5 b5b1a14 7b5e2ea b5b1a14 7b5e2ea f593adc 7b5e2ea b5b1a14 dda8acb 51a43a5 b5b1a14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | FROM python:3.11-slim
RUN apt-get update && apt-get install -y \
ffmpeg git curl nodejs npm build-essential cmake pkg-config libopenblas-dev \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# CPU torch for Hugging Face Spaces CPU. Keep this aligned with requirements pins.
RUN pip install --no-cache-dir \
torch==2.4.1 torchaudio==2.4.1 \
--index-url https://download.pytorch.org/whl/cpu
COPY wolof_voice_agent/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Build llama-cpp-python inside Debian. The prebuilt wheel previously loaded a
# musl-linked libllama.so on Spaces and failed with libc.musl-x86_64.so.1.
RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
FORCE_CMAKE=1 \
pip install --no-cache-dir --no-binary llama-cpp-python llama-cpp-python==0.2.90 && \
pip install --no-cache-dir diskcache jinja2
ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
ENV PYTHONPATH=/app/wolof_voice_agent
ENV GGUF_REPO=DevQuasar-6/soynade-research.Oolel-v0.1-GGUF
ENV GGUF_SOURCE_FILENAME=soynade-research.Oolel-v0.1.Q4_K_M.gguf
ENV GGUF_FILENAME=oolel-v0.1-q4_k_m.gguf
ENV DISABLE_TTS=1
ENV VITE_DISABLE_TTS=1
ENV DISABLE_ASR=1
ENV VITE_DISABLE_ASR=1
ENV SPACE_LLM_MAX_TOKENS=220
RUN mkdir -p /app/wolof_voice_agent/data/cache/huggingface \
/app/wolof_voice_agent/models/gguf
RUN python - <<'PYEOF'
import os
import shutil
from huggingface_hub import snapshot_download, hf_hub_download
cache = os.environ["HF_HOME"]
gguf_dir = "/app/wolof_voice_agent/models/gguf"
gguf_repo = os.environ["GGUF_REPO"]
gguf_source_file = os.environ["GGUF_SOURCE_FILENAME"]
gguf_runtime_file = os.environ["GGUF_FILENAME"]
print(f"Downloading LLM GGUF: {gguf_source_file} from {gguf_repo} ...")
src = hf_hub_download(repo_id=gguf_repo, filename=gguf_source_file, local_dir=gguf_dir)
dst = os.path.join(gguf_dir, gguf_runtime_file)
if src != dst:
shutil.copy2(src, dst)
print("All models ready.")
PYEOF
COPY frontend/package.json frontend/package-lock.json \
/app/wolof_voice_agent/frontend/
RUN cd /app/wolof_voice_agent/frontend && npm ci
COPY frontend/ /app/wolof_voice_agent/frontend/
RUN cd /app/wolof_voice_agent/frontend && npm run build
COPY wolof_voice_agent/ /app/wolof_voice_agent/
WORKDIR /app/wolof_voice_agent
ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
ENV PYTHONPATH=/app/wolof_voice_agent
ENV HF_DATASETS_OFFLINE=1
ENV DISABLE_TTS=1
ENV DISABLE_ASR=1
ENV SPACE_LLM_MAX_TOKENS=220
EXPOSE 7860
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|