File size: 4,302 Bytes
af75202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be36ee7
 
af75202
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Optimized Dockerfile for Hugging Face Spaces with T4 GPU
# Pre-downloads models during build to eliminate cold-start delays

FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
    TZ=Etc/UTC \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-dev \
    python3-pip \
    tesseract-ocr \
    poppler-utils \
    ffmpeg \
    git \
    curl \
    wget \
    && ln -sf /usr/bin/python3.10 /usr/bin/python \
    && ln -sf /usr/bin/python3.10 /usr/bin/python3 \
    && rm -rf /var/lib/apt/lists/*

# Upgrade pip
RUN python3 -m pip install --upgrade pip setuptools wheel

# ============================================================================
# Stage: Build and install dependencies
# ============================================================================
FROM base AS builder

WORKDIR /app

# Copy requirements file
COPY requirements.txt .

# Install Python dependencies
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt

# ============================================================================
# Stage: Model preloading
# ============================================================================
FROM builder AS model-cache

# Set persistent cache directories in the image (not /tmp)
ENV HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets

# Create cache directories
RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR

# Copy preload script
COPY preload_models.py /app/

# Pre-download all models during build
# This will cache models in the Docker image layer
RUN python3 /app/preload_models.py

# Verify models were cached
RUN echo "Verifying cached models..." && \
    du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \
    find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20

# ============================================================================
# Stage: Final runtime image
# ============================================================================
FROM base AS runtime

# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy cached models from model-cache stage
COPY --from=model-cache /app/.cache /app/.cache
COPY --from=model-cache /app/models /app/models

# Set working directory
WORKDIR /app

# Copy application code
COPY . .

# Set environment variables for runtime
ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
    HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
    TRANSFORMERS_OFFLINE=0 \
    HF_HUB_OFFLINE=0 \
    CUDA_VISIBLE_DEVICES=0 \
    PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
    OMP_NUM_THREADS=4 \
    MKL_NUM_THREADS=4 \
    NUMEXPR_NUM_THREADS=4 \
    GGUF_N_THREADS=4 \
    GGUF_N_BATCH=128 \
    GGUF_N_GPU_LAYERS=32 \
    PRELOAD_GGUF=true \
    HF_SPACES=true \
    SPACE_ID=${SPACE_ID:-""} \
    MPLCONFIGDIR=/tmp/matplotlib

# Create runtime directories (for uploads, temp files, etc.)
RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
    chmod -R 777 /tmp

# Copy and setup entrypoint script and configuration
COPY entrypoint.sh /entrypoint.sh
COPY verify_cache.py /app/verify_cache.py
COPY models_config.json /app/models_config.json
RUN chmod +x /entrypoint.sh

# Expose port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]

# Start the application
# Use the root app.py which is designed for HF Spaces
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]