Spaces:

GaindeNdiaye
/

khAdI

Sleeping

khAdI / Dockerfile

Mouhamed Naby NDIAYE

Run Space as text-only LLM demo

51a43a5 about 1 month ago

2.61 kB

	FROM python:3.11-slim

	RUN apt-get update && apt-get install -y \
	ffmpeg git curl nodejs npm build-essential cmake pkg-config libopenblas-dev \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /app

	# CPU torch for Hugging Face Spaces CPU. Keep this aligned with requirements pins.
	RUN pip install --no-cache-dir \
	torch==2.4.1 torchaudio==2.4.1 \
	--index-url https://download.pytorch.org/whl/cpu

	COPY wolof_voice_agent/requirements.txt ./requirements.txt
	RUN pip install --no-cache-dir -r requirements.txt

	# Build llama-cpp-python inside Debian. The prebuilt wheel previously loaded a
	# musl-linked libllama.so on Spaces and failed with libc.musl-x86_64.so.1.
	RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
	FORCE_CMAKE=1 \
	pip install --no-cache-dir --no-binary llama-cpp-python llama-cpp-python==0.2.90 && \
	pip install --no-cache-dir diskcache jinja2

	ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
	ENV PYTHONPATH=/app/wolof_voice_agent
	ENV GGUF_REPO=DevQuasar-6/soynade-research.Oolel-v0.1-GGUF
	ENV GGUF_SOURCE_FILENAME=soynade-research.Oolel-v0.1.Q4_K_M.gguf
	ENV GGUF_FILENAME=oolel-v0.1-q4_k_m.gguf
	ENV DISABLE_TTS=1
	ENV VITE_DISABLE_TTS=1
	ENV DISABLE_ASR=1
	ENV VITE_DISABLE_ASR=1
	ENV SPACE_LLM_MAX_TOKENS=220

	RUN mkdir -p /app/wolof_voice_agent/data/cache/huggingface \
	/app/wolof_voice_agent/models/gguf

	RUN python - <<'PYEOF'
	import os
	import shutil
	from huggingface_hub import snapshot_download, hf_hub_download

	cache = os.environ["HF_HOME"]
	gguf_dir = "/app/wolof_voice_agent/models/gguf"
	gguf_repo = os.environ["GGUF_REPO"]
	gguf_source_file = os.environ["GGUF_SOURCE_FILENAME"]
	gguf_runtime_file = os.environ["GGUF_FILENAME"]
	print(f"Downloading LLM GGUF: {gguf_source_file} from {gguf_repo} ...")
	src = hf_hub_download(repo_id=gguf_repo, filename=gguf_source_file, local_dir=gguf_dir)
	dst = os.path.join(gguf_dir, gguf_runtime_file)
	if src != dst:
	shutil.copy2(src, dst)
	print("All models ready.")
	PYEOF

	COPY frontend/package.json frontend/package-lock.json \
	/app/wolof_voice_agent/frontend/
	RUN cd /app/wolof_voice_agent/frontend && npm ci

	COPY frontend/ /app/wolof_voice_agent/frontend/
	RUN cd /app/wolof_voice_agent/frontend && npm run build

	COPY wolof_voice_agent/ /app/wolof_voice_agent/

	WORKDIR /app/wolof_voice_agent

	ENV HF_HOME=/app/wolof_voice_agent/data/cache/huggingface
	ENV PYTHONPATH=/app/wolof_voice_agent
	ENV HF_DATASETS_OFFLINE=1
	ENV DISABLE_TTS=1
	ENV DISABLE_ASR=1
	ENV SPACE_LLM_MAX_TOKENS=220

	EXPOSE 7860

	CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]