# Optimized Dockerfile for Hugging Face Spaces with T4 GPU # Pre-downloads models during build to eliminate cold-start delays FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base # Set environment variables ENV DEBIAN_FRONTEND=noninteractive \ TZ=Etc/UTC \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3.10 \ python3.10-dev \ python3-pip \ tesseract-ocr \ poppler-utils \ ffmpeg \ git \ curl \ wget \ && ln -sf /usr/bin/python3.10 /usr/bin/python \ && ln -sf /usr/bin/python3.10 /usr/bin/python3 \ && rm -rf /var/lib/apt/lists/* # Upgrade pip RUN python3 -m pip install --upgrade pip setuptools wheel # ============================================================================ # Stage: Build and install dependencies # ============================================================================ FROM base AS builder WORKDIR /app # Copy requirements file COPY requirements.txt . # Install Python dependencies # Using --no-cache-dir to reduce image size RUN pip install --no-cache-dir -r requirements.txt # ============================================================================ # Stage: Model preloading # ============================================================================ FROM builder AS model-cache # Set persistent cache directories in the image (not /tmp) ENV HF_HOME=/app/.cache/huggingface \ TORCH_HOME=/app/.cache/torch \ WHISPER_CACHE=/app/.cache/whisper \ MODEL_CACHE_DIR=/app/models \ TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ HF_DATASETS_CACHE=/app/.cache/huggingface/datasets # Create cache directories RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR # Copy preload script COPY scripts/preload_models.py /app/ # Pre-download all models during build # This will cache models in the Docker image layer RUN python3 /app/preload_models.py # Verify models were cached RUN echo "Verifying cached models..." && \ du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \ find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20 # ============================================================================ # Stage: Final runtime image # ============================================================================ FROM base AS runtime # Copy Python packages from builder COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages COPY --from=builder /usr/local/bin /usr/local/bin # Copy cached models from model-cache stage COPY --from=model-cache /app/.cache /app/.cache COPY --from=model-cache /app/models /app/models # Set working directory WORKDIR /app # Copy application code COPY . . # Set environment variables for runtime ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \ HF_HOME=/app/.cache/huggingface \ TORCH_HOME=/app/.cache/torch \ WHISPER_CACHE=/app/.cache/whisper \ MODEL_CACHE_DIR=/app/models \ TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \ TRANSFORMERS_OFFLINE=0 \ HF_HUB_OFFLINE=0 \ CUDA_VISIBLE_DEVICES=0 \ PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \ OMP_NUM_THREADS=4 \ MKL_NUM_THREADS=4 \ NUMEXPR_NUM_THREADS=4 \ GGUF_N_THREADS=4 \ GGUF_N_BATCH=128 \ GGUF_N_GPU_LAYERS=32 \ PRELOAD_GGUF=true \ HF_SPACES=true \ SPACE_ID=${SPACE_ID:-""} \ MPLCONFIGDIR=/tmp/matplotlib # Create runtime directories (for uploads, temp files, etc.) RUN mkdir -p /tmp/uploads /tmp/matplotlib && \ chmod -R 777 /tmp # Copy and setup entrypoint script and configuration COPY entrypoint.sh /entrypoint.sh COPY scripts/verify_cache.py /app/verify_cache.py COPY models_config.json /app/models_config.json RUN chmod +x /entrypoint.sh # Expose port EXPOSE 7860 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 # Set entrypoint ENTRYPOINT ["/entrypoint.sh"] # Start the application # Use the root app.py which is designed for HF Spaces CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]