Spaces:
Paused
Paused
Refactor Docker configurations to use `uvicorn` as the entry point for FastAPI applications. Update `.huggingface.yaml` to remove legacy app configuration and clarify hardware requirements. Modify `Dockerfile.prod` to install `uvicorn` and adjust the command for production deployment.
be36ee7 | # Optimized Dockerfile for Hugging Face Spaces with T4 GPU | |
| # Pre-downloads models during build to eliminate cold-start delays | |
| FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base | |
| # Set environment variables | |
| ENV DEBIAN_FRONTEND=noninteractive \ | |
| TZ=Etc/UTC \ | |
| PYTHONUNBUFFERED=1 \ | |
| PYTHONDONTWRITEBYTECODE=1 | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| python3.10 \ | |
| python3.10-dev \ | |
| python3-pip \ | |
| tesseract-ocr \ | |
| poppler-utils \ | |
| ffmpeg \ | |
| git \ | |
| curl \ | |
| wget \ | |
| && ln -sf /usr/bin/python3.10 /usr/bin/python \ | |
| && ln -sf /usr/bin/python3.10 /usr/bin/python3 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Upgrade pip | |
| RUN python3 -m pip install --upgrade pip setuptools wheel | |
| # ============================================================================ | |
| # Stage: Build and install dependencies | |
| # ============================================================================ | |
| FROM base AS builder | |
| WORKDIR /app | |
| # Copy requirements file | |
| COPY requirements.txt . | |
| # Install Python dependencies | |
| # Using --no-cache-dir to reduce image size | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # ============================================================================ | |
| # Stage: Model preloading | |
| # ============================================================================ | |
| FROM builder AS model-cache | |
| # Set persistent cache directories in the image (not /tmp) | |
| ENV HF_HOME=/app/.cache/huggingface \ | |
| TORCH_HOME=/app/.cache/torch \ | |
| WHISPER_CACHE=/app/.cache/whisper \ | |
| MODEL_CACHE_DIR=/app/models \ | |
| TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ | |
| HF_DATASETS_CACHE=/app/.cache/huggingface/datasets | |
| # Create cache directories | |
| RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR | |
| # Copy preload script | |
| COPY preload_models.py /app/ | |
| # Pre-download all models during build | |
| # This will cache models in the Docker image layer | |
| RUN python3 /app/preload_models.py | |
| # Verify models were cached | |
| RUN echo "Verifying cached models..." && \ | |
| du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \ | |
| find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20 | |
| # ============================================================================ | |
| # Stage: Final runtime image | |
| # ============================================================================ | |
| FROM base AS runtime | |
| # Copy Python packages from builder | |
| COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages | |
| COPY --from=builder /usr/local/bin /usr/local/bin | |
| # Copy cached models from model-cache stage | |
| COPY --from=model-cache /app/.cache /app/.cache | |
| COPY --from=model-cache /app/models /app/models | |
| # Set working directory | |
| WORKDIR /app | |
| # Copy application code | |
| COPY . . | |
| # Set environment variables for runtime | |
| ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \ | |
| HF_HOME=/app/.cache/huggingface \ | |
| TORCH_HOME=/app/.cache/torch \ | |
| WHISPER_CACHE=/app/.cache/whisper \ | |
| MODEL_CACHE_DIR=/app/models \ | |
| TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ | |
| HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \ | |
| TRANSFORMERS_OFFLINE=0 \ | |
| HF_HUB_OFFLINE=0 \ | |
| CUDA_VISIBLE_DEVICES=0 \ | |
| PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \ | |
| OMP_NUM_THREADS=4 \ | |
| MKL_NUM_THREADS=4 \ | |
| NUMEXPR_NUM_THREADS=4 \ | |
| GGUF_N_THREADS=4 \ | |
| GGUF_N_BATCH=128 \ | |
| GGUF_N_GPU_LAYERS=32 \ | |
| PRELOAD_GGUF=true \ | |
| HF_SPACES=true \ | |
| SPACE_ID=${SPACE_ID:-""} \ | |
| MPLCONFIGDIR=/tmp/matplotlib | |
| # Create runtime directories (for uploads, temp files, etc.) | |
| RUN mkdir -p /tmp/uploads /tmp/matplotlib && \ | |
| chmod -R 777 /tmp | |
| # Copy and setup entrypoint script and configuration | |
| COPY entrypoint.sh /entrypoint.sh | |
| COPY verify_cache.py /app/verify_cache.py | |
| COPY models_config.json /app/models_config.json | |
| RUN chmod +x /entrypoint.sh | |
| # Expose port | |
| EXPOSE 7860 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| # Set entrypoint | |
| ENTRYPOINT ["/entrypoint.sh"] | |
| # Start the application | |
| # Use the root app.py which is designed for HF Spaces | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] | |