File size: 2,605 Bytes
b5b1a14
 
 
7b5e2ea
b5b1a14
 
 
 
7b5e2ea
b5b1a14
 
 
 
 
 
 
7b5e2ea
 
 
 
 
531e3de
b5b1a14
 
 
f593adc
7b5e2ea
 
dda8acb
 
51a43a5
 
 
b5b1a14
 
 
 
 
 
7b5e2ea
b5b1a14
 
7b5e2ea
 
f593adc
7b5e2ea
 
 
 
 
 
 
b5b1a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dda8acb
51a43a5
 
b5b1a14
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
FROM python:3.11-slim

RUN apt-get update && apt-get install -y \
    ffmpeg git curl nodejs npm build-essential cmake pkg-config libopenblas-dev \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# CPU torch for Hugging Face Spaces CPU. Keep this aligned with requirements pins.
RUN pip install --no-cache-dir \
    torch==2.4.1 torchaudio==2.4.1 \
    --index-url https://download.pytorch.org/whl/cpu

COPY wolof_voice_agent/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Build llama-cpp-python inside Debian. The prebuilt wheel previously loaded a
# musl-linked libllama.so on Spaces and failed with libc.musl-x86_64.so.1.
RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
    FORCE_CMAKE=1 \
    pip install --no-cache-dir --no-binary llama-cpp-python llama-cpp-python==0.2.90 && \
    pip install --no-cache-dir diskcache jinja2

ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
ENV PYTHONPATH=/app/wolof_voice_agent
ENV GGUF_REPO=DevQuasar-6/soynade-research.Oolel-v0.1-GGUF
ENV GGUF_SOURCE_FILENAME=soynade-research.Oolel-v0.1.Q4_K_M.gguf
ENV GGUF_FILENAME=oolel-v0.1-q4_k_m.gguf
ENV DISABLE_TTS=1
ENV VITE_DISABLE_TTS=1
ENV DISABLE_ASR=1
ENV VITE_DISABLE_ASR=1
ENV SPACE_LLM_MAX_TOKENS=220

RUN mkdir -p /app/wolof_voice_agent/data/cache/huggingface \
             /app/wolof_voice_agent/models/gguf

RUN python - <<'PYEOF'
import os
import shutil
from huggingface_hub import snapshot_download, hf_hub_download

cache = os.environ["HF_HOME"]
gguf_dir = "/app/wolof_voice_agent/models/gguf"
gguf_repo = os.environ["GGUF_REPO"]
gguf_source_file = os.environ["GGUF_SOURCE_FILENAME"]
gguf_runtime_file = os.environ["GGUF_FILENAME"]
print(f"Downloading LLM GGUF: {gguf_source_file} from {gguf_repo} ...")
src = hf_hub_download(repo_id=gguf_repo, filename=gguf_source_file, local_dir=gguf_dir)
dst = os.path.join(gguf_dir, gguf_runtime_file)
if src != dst:
    shutil.copy2(src, dst)
print("All models ready.")
PYEOF

COPY frontend/package.json frontend/package-lock.json \
     /app/wolof_voice_agent/frontend/
RUN cd /app/wolof_voice_agent/frontend && npm ci

COPY frontend/ /app/wolof_voice_agent/frontend/
RUN cd /app/wolof_voice_agent/frontend && npm run build

COPY wolof_voice_agent/ /app/wolof_voice_agent/

WORKDIR /app/wolof_voice_agent

ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
ENV PYTHONPATH=/app/wolof_voice_agent
ENV HF_DATASETS_OFFLINE=1
ENV DISABLE_TTS=1
ENV DISABLE_ASR=1
ENV SPACE_LLM_MAX_TOKENS=220

EXPOSE 7860

CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]