File size: 1,889 Bytes
99643db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | # Use an official Python runtime as a parent image
FROM python:3.11-slim
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
ENV PORT 7860
# Set the working directory in the container
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpq-dev \
cmake \
pkg-config \
libgomp1 \
libopenblas-dev \
&& rm -rf /var/lib/apt/lists/*
# Set environment variables for better stability with Stan (Prophet) and Llama-cpp
ENV OMP_NUM_THREADS 1
ENV MKL_NUM_THREADS 1
ENV OPENBLAS_NUM_THREADS 1
ENV KMP_DUPLICATE_LIB_OK TRUE
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
RUN pip install --no-cache-dir -r requirements.txt
# Install llama-cpp-python (Latest).
# We compile from source because pre-built glibc wheels aren't always available.
# CRITICAL FIX for OOM (137): llama-cpp-python uses Ninja, which ignores MAKEFLAGS.
# We MUST set CMAKE_BUILD_PARALLEL_LEVEL=1 to limit it to a single thread.
ENV CMAKE_ARGS="-DGGML_CPU=ON"
ENV CMAKE_BUILD_PARALLEL_LEVEL="1"
RUN pip install --no-cache-dir --upgrade llama-cpp-python
# Pre-download the model into the image for instant startup on HF Spaces.
# Using Gemma 4 E4B (Instruct-GGUF) - ~2.5GB model file.
RUN mkdir -p models && \
python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='bartowski/google_gemma-4-E4B-it-GGUF', filename='google_gemma-4-E4B-it-Q4_K_M.gguf', local_dir='models')"
# Copy the rest of the application code
COPY . .
# Expose the port the app runs on
EXPOSE 7860
# Command to run the application using uvicorn with a single worker
# Reverting to 1 worker for debugging startup hangs on HF Spaces.
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|