Spaces:

salvinjose
/

HNTAI

Paused

HNTAI / Dockerfile.hf-spaces

Refactor Docker configurations to use `uvicorn` as the entry point for FastAPI applications. Update `.huggingface.yaml` to remove legacy app configuration and clarify hardware requirements. Modify `Dockerfile.prod` to install `uvicorn` and adjust the command for production deployment.

be36ee7 8 months ago

Raw

History Blame

4.3 kB

	# Optimized Dockerfile for Hugging Face Spaces with T4 GPU
	# Pre-downloads models during build to eliminate cold-start delays

	FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base

	# Set environment variables
	ENV DEBIAN_FRONTEND=noninteractive \
	TZ=Etc/UTC \
	PYTHONUNBUFFERED=1 \
	PYTHONDONTWRITEBYTECODE=1

	# Install system dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	python3.10 \
	python3.10-dev \
	python3-pip \
	tesseract-ocr \
	poppler-utils \
	ffmpeg \
	git \
	curl \
	wget \
	&& ln -sf /usr/bin/python3.10 /usr/bin/python \
	&& ln -sf /usr/bin/python3.10 /usr/bin/python3 \
	&& rm -rf /var/lib/apt/lists/*

	# Upgrade pip
	RUN python3 -m pip install --upgrade pip setuptools wheel

	# ============================================================================
	# Stage: Build and install dependencies
	# ============================================================================
	FROM base AS builder

	WORKDIR /app

	# Copy requirements file
	COPY requirements.txt .

	# Install Python dependencies
	# Using --no-cache-dir to reduce image size
	RUN pip install --no-cache-dir -r requirements.txt

	# ============================================================================
	# Stage: Model preloading
	# ============================================================================
	FROM builder AS model-cache

	# Set persistent cache directories in the image (not /tmp)
	ENV HF_HOME=/app/.cache/huggingface \
	TORCH_HOME=/app/.cache/torch \
	WHISPER_CACHE=/app/.cache/whisper \
	MODEL_CACHE_DIR=/app/models \
	TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
	HF_DATASETS_CACHE=/app/.cache/huggingface/datasets

	# Create cache directories
	RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR

	# Copy preload script
	COPY preload_models.py /app/

	# Pre-download all models during build
	# This will cache models in the Docker image layer
	RUN python3 /app/preload_models.py

	# Verify models were cached
	RUN echo "Verifying cached models..." && \
	du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE \|\| true && \
	find $HF_HOME -type f -name ".bin" -o -name ".safetensors" -o -name "*.gguf" \| head -20

	# ============================================================================
	# Stage: Final runtime image
	# ============================================================================
	FROM base AS runtime

	# Copy Python packages from builder
	COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
	COPY --from=builder /usr/local/bin /usr/local/bin

	# Copy cached models from model-cache stage
	COPY --from=model-cache /app/.cache /app/.cache
	COPY --from=model-cache /app/models /app/models

	# Set working directory
	WORKDIR /app

	# Copy application code
	COPY . .

	# Set environment variables for runtime
	ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
	HF_HOME=/app/.cache/huggingface \
	TORCH_HOME=/app/.cache/torch \
	WHISPER_CACHE=/app/.cache/whisper \
	MODEL_CACHE_DIR=/app/models \
	TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
	HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
	TRANSFORMERS_OFFLINE=0 \
	HF_HUB_OFFLINE=0 \
	CUDA_VISIBLE_DEVICES=0 \
	PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
	OMP_NUM_THREADS=4 \
	MKL_NUM_THREADS=4 \
	NUMEXPR_NUM_THREADS=4 \
	GGUF_N_THREADS=4 \
	GGUF_N_BATCH=128 \
	GGUF_N_GPU_LAYERS=32 \
	PRELOAD_GGUF=true \
	HF_SPACES=true \
	SPACE_ID=${SPACE_ID:-""} \
	MPLCONFIGDIR=/tmp/matplotlib

	# Create runtime directories (for uploads, temp files, etc.)
	RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
	chmod -R 777 /tmp

	# Copy and setup entrypoint script and configuration
	COPY entrypoint.sh /entrypoint.sh
	COPY verify_cache.py /app/verify_cache.py
	COPY models_config.json /app/models_config.json
	RUN chmod +x /entrypoint.sh

	# Expose port
	EXPOSE 7860

	# Health check
	HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
	CMD curl -f http://localhost:7860/health \|\| exit 1

	# Set entrypoint
	ENTRYPOINT ["/entrypoint.sh"]

	# Start the application
	# Use the root app.py which is designed for HF Spaces
	CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]