Spaces:

salvinjose
/

HNTAI

Paused

File size: 4,302 Bytes

# Optimized Dockerfile for Hugging Face Spaces with T4 GPU
# Pre-downloads models during build to eliminate cold-start delays

FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
    TZ=Etc/UTC \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-dev \
    python3-pip \
    tesseract-ocr \
    poppler-utils \
    ffmpeg \
    git \
    curl \
    wget \
    && ln -sf /usr/bin/python3.10 /usr/bin/python \
    && ln -sf /usr/bin/python3.10 /usr/bin/python3 \
    && rm -rf /var/lib/apt/lists/*

# Upgrade pip
RUN python3 -m pip install --upgrade pip setuptools wheel

# ============================================================================
# Stage: Build and install dependencies
# ============================================================================
FROM base AS builder

WORKDIR /app

# Copy requirements file
COPY requirements.txt .

# Install Python dependencies
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt

# ============================================================================
# Stage: Model preloading
# ============================================================================
FROM builder AS model-cache

# Set persistent cache directories in the image (not /tmp)
ENV HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets

# Create cache directories
RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR

# Copy preload script
COPY preload_models.py /app/

# Pre-download all models during build
# This will cache models in the Docker image layer
RUN python3 /app/preload_models.py

# Verify models were cached
RUN echo "Verifying cached models..." && \
    du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \
    find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20

# ============================================================================
# Stage: Final runtime image
# ============================================================================
FROM base AS runtime

# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy cached models from model-cache stage
COPY --from=model-cache /app/.cache /app/.cache
COPY --from=model-cache /app/models /app/models

# Set working directory
WORKDIR /app

# Copy application code
COPY . .

# Set environment variables for runtime
ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
    HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
    TRANSFORMERS_OFFLINE=0 \
    HF_HUB_OFFLINE=0 \
    CUDA_VISIBLE_DEVICES=0 \
    PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
    OMP_NUM_THREADS=4 \
    MKL_NUM_THREADS=4 \
    NUMEXPR_NUM_THREADS=4 \
    GGUF_N_THREADS=4 \
    GGUF_N_BATCH=128 \
    GGUF_N_GPU_LAYERS=32 \
    PRELOAD_GGUF=true \
    HF_SPACES=true \
    SPACE_ID=${SPACE_ID:-""} \
    MPLCONFIGDIR=/tmp/matplotlib

# Create runtime directories (for uploads, temp files, etc.)
RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
    chmod -R 777 /tmp

# Copy and setup entrypoint script and configuration
COPY entrypoint.sh /entrypoint.sh
COPY verify_cache.py /app/verify_cache.py
COPY models_config.json /app/models_config.json
RUN chmod +x /entrypoint.sh

# Expose port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]

# Start the application
# Use the root app.py which is designed for HF Spaces
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]