HNTAI / Dockerfile.hf-spaces
sachinchandrankallar's picture
Refactor Docker configurations to use `uvicorn` as the entry point for FastAPI applications. Update `.huggingface.yaml` to remove legacy app configuration and clarify hardware requirements. Modify `Dockerfile.prod` to install `uvicorn` and adjust the command for production deployment.
be36ee7
Raw
History Blame
4.3 kB
# Optimized Dockerfile for Hugging Face Spaces with T4 GPU
# Pre-downloads models during build to eliminate cold-start delays
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.10 \
python3.10-dev \
python3-pip \
tesseract-ocr \
poppler-utils \
ffmpeg \
git \
curl \
wget \
&& ln -sf /usr/bin/python3.10 /usr/bin/python \
&& ln -sf /usr/bin/python3.10 /usr/bin/python3 \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip
RUN python3 -m pip install --upgrade pip setuptools wheel
# ============================================================================
# Stage: Build and install dependencies
# ============================================================================
FROM base AS builder
WORKDIR /app
# Copy requirements file
COPY requirements.txt .
# Install Python dependencies
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt
# ============================================================================
# Stage: Model preloading
# ============================================================================
FROM builder AS model-cache
# Set persistent cache directories in the image (not /tmp)
ENV HF_HOME=/app/.cache/huggingface \
TORCH_HOME=/app/.cache/torch \
WHISPER_CACHE=/app/.cache/whisper \
MODEL_CACHE_DIR=/app/models \
TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
HF_DATASETS_CACHE=/app/.cache/huggingface/datasets
# Create cache directories
RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR
# Copy preload script
COPY preload_models.py /app/
# Pre-download all models during build
# This will cache models in the Docker image layer
RUN python3 /app/preload_models.py
# Verify models were cached
RUN echo "Verifying cached models..." && \
du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \
find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20
# ============================================================================
# Stage: Final runtime image
# ============================================================================
FROM base AS runtime
# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Copy cached models from model-cache stage
COPY --from=model-cache /app/.cache /app/.cache
COPY --from=model-cache /app/models /app/models
# Set working directory
WORKDIR /app
# Copy application code
COPY . .
# Set environment variables for runtime
ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
HF_HOME=/app/.cache/huggingface \
TORCH_HOME=/app/.cache/torch \
WHISPER_CACHE=/app/.cache/whisper \
MODEL_CACHE_DIR=/app/models \
TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
TRANSFORMERS_OFFLINE=0 \
HF_HUB_OFFLINE=0 \
CUDA_VISIBLE_DEVICES=0 \
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
OMP_NUM_THREADS=4 \
MKL_NUM_THREADS=4 \
NUMEXPR_NUM_THREADS=4 \
GGUF_N_THREADS=4 \
GGUF_N_BATCH=128 \
GGUF_N_GPU_LAYERS=32 \
PRELOAD_GGUF=true \
HF_SPACES=true \
SPACE_ID=${SPACE_ID:-""} \
MPLCONFIGDIR=/tmp/matplotlib
# Create runtime directories (for uploads, temp files, etc.)
RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
chmod -R 777 /tmp
# Copy and setup entrypoint script and configuration
COPY entrypoint.sh /entrypoint.sh
COPY verify_cache.py /app/verify_cache.py
COPY models_config.json /app/models_config.json
RUN chmod +x /entrypoint.sh
# Expose port
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]
# Start the application
# Use the root app.py which is designed for HF Spaces
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]