Spaces:

salvinjose
/

HNTAI

Paused

File size: 4,621 Bytes

ec7a30f
 
5547093
ec7a30f
4156c57
ec7a30f
 
 
 
 
4156c57
ec7a30f
 
 
 
 
032e872
 
ec7a30f
 
 
 
 
 
 
 
 
4156c57
569dec6
 
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
4156c57
ec7a30f
 
4156c57
ba7396d
 
b0bb219
ba7396d
ec7a30f
 
 
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
 
 
 
 
 
 
4156c57
ec7a30f
 
4156c57
ec7a30f
c7103dc
4156c57
ec7a30f
 
c7103dc
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
 
 
4156c57
ec7a30f
 
 
4156c57
ec7a30f
232a26e
 
ec7a30f
4156c57
 
ec7a30f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4156c57
5547093
ec7a30f
 
 
 
 
5547093
ec7a30f
5aafb3a
 
ec7a30f

# Optimized Dockerfile for Hugging Face Spaces with T4 GPU
# Pre-downloads models during build to eliminate cold-start delays

FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
    TZ=Etc/UTC \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-dev \
    python3-pip \
    build-essential \
    cmake \
    tesseract-ocr \
    poppler-utils \
    ffmpeg \
    git \
    curl \
    wget \
    && ln -sf /usr/bin/python3.10 /usr/bin/python \
    && ln -sf /usr/bin/python3.10 /usr/bin/python3 \
    && rm -rf /var/lib/apt/lists/*

# Upgrade pip and install setuptools compatible with openai-whisper
RUN python3 -m pip install --upgrade pip "setuptools<70.0.0" wheel

# ============================================================================
# Stage: Build and install dependencies
# ============================================================================
FROM base AS builder

WORKDIR /app

# Copy requirements file
COPY requirements.txt .

# Install dependencies causing build isolation issues first
# This ensures it uses the system setuptools<70.0.0
RUN pip install --no-cache-dir --no-build-isolation "numpy<2.0.0" openai-whisper==20231117

# Install Python dependencies
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt

# ============================================================================
# Stage: Model preloading
# ============================================================================
FROM builder AS model-cache

# Set persistent cache directories in the image (not /tmp)
ENV HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets

# Create cache directories
RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR

# Copy preload script
# COPY scripts/preload_models.py /app/

# Pre-download all models during build
# This will cache models in the Docker image layer
# RUN python3 /app/preload_models.py

# Verify models were cached
RUN echo "Verifying cached models..." && \
    du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \
    find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20

# ============================================================================
# Stage: Final runtime image
# ============================================================================
FROM base AS runtime

# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy cached models from model-cache stage
COPY --from=model-cache /app/.cache /app/.cache
COPY --from=model-cache /app/models /app/models

# Set working directory
WORKDIR /app

# Copy application code
COPY . .

# Set environment variables for runtime
ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
    HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
    TRANSFORMERS_OFFLINE=0 \
    HF_HUB_OFFLINE=0 \
    CUDA_VISIBLE_DEVICES=0 \
    PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
    OMP_NUM_THREADS=4 \
    MKL_NUM_THREADS=4 \
    NUMEXPR_NUM_THREADS=4 \
    GGUF_N_THREADS=4 \
    GGUF_N_BATCH=128 \
    GGUF_N_GPU_LAYERS=32 \
    PRELOAD_GGUF=true \
    HF_SPACES=true \
    SPACE_ID=${SPACE_ID:-""} \
    MPLCONFIGDIR=/tmp/matplotlib

# Create runtime directories (for uploads, temp files, etc.)
RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
    chmod -R 777 /tmp

# Copy and setup entrypoint script and configuration
COPY entrypoint.sh /entrypoint.sh
COPY scripts/verify_cache.py /app/verify_cache.py
COPY models_config.json /app/models_config.json
RUN chmod +x /entrypoint.sh

# Expose port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]

# Start the application
# Use the root app.py which is designed for HF Spaces
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]