# Use an official Python runtime as a parent image FROM python:3.11-slim # Set environment variables ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 ENV PORT 7860 # Set the working directory in the container WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ libpq-dev \ cmake \ pkg-config \ libgomp1 \ libopenblas-dev \ && rm -rf /var/lib/apt/lists/* # Set environment variables for better stability with Stan (Prophet) and Llama-cpp ENV OMP_NUM_THREADS 1 ENV MKL_NUM_THREADS 1 ENV OPENBLAS_NUM_THREADS 1 ENV KMP_DUPLICATE_LIB_OK TRUE # Install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip setuptools wheel RUN pip install --no-cache-dir -r requirements.txt # Install llama-cpp-python (Latest). # We compile from source because pre-built glibc wheels aren't always available. # CRITICAL FIX for OOM (137): llama-cpp-python uses Ninja, which ignores MAKEFLAGS. # We MUST set CMAKE_BUILD_PARALLEL_LEVEL=1 to limit it to a single thread. ENV CMAKE_ARGS="-DGGML_CPU=ON" ENV CMAKE_BUILD_PARALLEL_LEVEL="1" RUN pip install --no-cache-dir --upgrade llama-cpp-python # Pre-download the model into the image for instant startup on HF Spaces. # Using Gemma 4 E4B (Instruct-GGUF) - ~2.5GB model file. RUN mkdir -p models && \ python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='bartowski/google_gemma-4-E4B-it-GGUF', filename='google_gemma-4-E4B-it-Q4_K_M.gguf', local_dir='models')" # Copy the rest of the application code COPY . . # Expose the port the app runs on EXPOSE 7860 # Command to run the application using uvicorn with a single worker # Reverting to 1 worker for debugging startup hangs on HF Spaces. CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]