Spaces:
Paused
Paused
File size: 4,621 Bytes
ec7a30f 5547093 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 032e872 ec7a30f 4156c57 569dec6 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ba7396d b0bb219 ba7396d ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f c7103dc 4156c57 ec7a30f c7103dc 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 4156c57 ec7a30f 232a26e ec7a30f 4156c57 ec7a30f 4156c57 5547093 ec7a30f 5547093 ec7a30f 5aafb3a ec7a30f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | # Optimized Dockerfile for Hugging Face Spaces with T4 GPU
# Pre-downloads models during build to eliminate cold-start delays
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.10 \
python3.10-dev \
python3-pip \
build-essential \
cmake \
tesseract-ocr \
poppler-utils \
ffmpeg \
git \
curl \
wget \
&& ln -sf /usr/bin/python3.10 /usr/bin/python \
&& ln -sf /usr/bin/python3.10 /usr/bin/python3 \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip and install setuptools compatible with openai-whisper
RUN python3 -m pip install --upgrade pip "setuptools<70.0.0" wheel
# ============================================================================
# Stage: Build and install dependencies
# ============================================================================
FROM base AS builder
WORKDIR /app
# Copy requirements file
COPY requirements.txt .
# Install dependencies causing build isolation issues first
# This ensures it uses the system setuptools<70.0.0
RUN pip install --no-cache-dir --no-build-isolation "numpy<2.0.0" openai-whisper==20231117
# Install Python dependencies
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt
# ============================================================================
# Stage: Model preloading
# ============================================================================
FROM builder AS model-cache
# Set persistent cache directories in the image (not /tmp)
ENV HF_HOME=/app/.cache/huggingface \
TORCH_HOME=/app/.cache/torch \
WHISPER_CACHE=/app/.cache/whisper \
MODEL_CACHE_DIR=/app/models \
TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
HF_DATASETS_CACHE=/app/.cache/huggingface/datasets
# Create cache directories
RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR
# Copy preload script
# COPY scripts/preload_models.py /app/
# Pre-download all models during build
# This will cache models in the Docker image layer
# RUN python3 /app/preload_models.py
# Verify models were cached
RUN echo "Verifying cached models..." && \
du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \
find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20
# ============================================================================
# Stage: Final runtime image
# ============================================================================
FROM base AS runtime
# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Copy cached models from model-cache stage
COPY --from=model-cache /app/.cache /app/.cache
COPY --from=model-cache /app/models /app/models
# Set working directory
WORKDIR /app
# Copy application code
COPY . .
# Set environment variables for runtime
ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
HF_HOME=/app/.cache/huggingface \
TORCH_HOME=/app/.cache/torch \
WHISPER_CACHE=/app/.cache/whisper \
MODEL_CACHE_DIR=/app/models \
TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
TRANSFORMERS_OFFLINE=0 \
HF_HUB_OFFLINE=0 \
CUDA_VISIBLE_DEVICES=0 \
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
OMP_NUM_THREADS=4 \
MKL_NUM_THREADS=4 \
NUMEXPR_NUM_THREADS=4 \
GGUF_N_THREADS=4 \
GGUF_N_BATCH=128 \
GGUF_N_GPU_LAYERS=32 \
PRELOAD_GGUF=true \
HF_SPACES=true \
SPACE_ID=${SPACE_ID:-""} \
MPLCONFIGDIR=/tmp/matplotlib
# Create runtime directories (for uploads, temp files, etc.)
RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
chmod -R 777 /tmp
# Copy and setup entrypoint script and configuration
COPY entrypoint.sh /entrypoint.sh
COPY scripts/verify_cache.py /app/verify_cache.py
COPY models_config.json /app/models_config.json
RUN chmod +x /entrypoint.sh
# Expose port
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]
# Start the application
# Use the root app.py which is designed for HF Spaces
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] |