Spaces:

a-k-dey
/

grip

Running

grip / Dockerfile

github-actions

Deploy to Hugging Face

99643db 17 days ago

1.89 kB

	# Use an official Python runtime as a parent image
	FROM python:3.11-slim

	# Set environment variables
	ENV PYTHONDONTWRITEBYTECODE 1
	ENV PYTHONUNBUFFERED 1
	ENV PORT 7860

	# Set the working directory in the container
	WORKDIR /app

	# Install system dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	libpq-dev \
	cmake \
	pkg-config \
	libgomp1 \
	libopenblas-dev \
	&& rm -rf /var/lib/apt/lists/*

	# Set environment variables for better stability with Stan (Prophet) and Llama-cpp
	ENV OMP_NUM_THREADS 1
	ENV MKL_NUM_THREADS 1
	ENV OPENBLAS_NUM_THREADS 1
	ENV KMP_DUPLICATE_LIB_OK TRUE

	# Install Python dependencies
	COPY requirements.txt .
	RUN pip install --no-cache-dir --upgrade pip setuptools wheel
	RUN pip install --no-cache-dir -r requirements.txt

	# Install llama-cpp-python (Latest).
	# We compile from source because pre-built glibc wheels aren't always available.
	# CRITICAL FIX for OOM (137): llama-cpp-python uses Ninja, which ignores MAKEFLAGS.
	# We MUST set CMAKE_BUILD_PARALLEL_LEVEL=1 to limit it to a single thread.
	ENV CMAKE_ARGS="-DGGML_CPU=ON"
	ENV CMAKE_BUILD_PARALLEL_LEVEL="1"
	RUN pip install --no-cache-dir --upgrade llama-cpp-python

	# Pre-download the model into the image for instant startup on HF Spaces.
	# Using Gemma 4 E4B (Instruct-GGUF) - ~2.5GB model file.
	RUN mkdir -p models && \
	python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='bartowski/google_gemma-4-E4B-it-GGUF', filename='google_gemma-4-E4B-it-Q4_K_M.gguf', local_dir='models')"

	# Copy the rest of the application code
	COPY . .

	# Expose the port the app runs on
	EXPOSE 7860

	# Command to run the application using uvicorn with a single worker
	# Reverting to 1 worker for debugging startup hangs on HF Spaces.
	CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]