File size: 4,621 Bytes
ec7a30f
 
5547093
ec7a30f
4156c57
ec7a30f
 
 
 
 
4156c57
ec7a30f
 
 
 
 
032e872
 
ec7a30f
 
 
 
 
 
 
 
 
4156c57
569dec6
 
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
4156c57
ec7a30f
 
4156c57
ba7396d
 
b0bb219
ba7396d
ec7a30f
 
 
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
 
 
 
 
 
 
4156c57
ec7a30f
 
4156c57
ec7a30f
c7103dc
4156c57
ec7a30f
 
c7103dc
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
 
 
 
4156c57
ec7a30f
 
 
4156c57
ec7a30f
 
 
4156c57
ec7a30f
232a26e
 
ec7a30f
4156c57
 
ec7a30f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4156c57
5547093
ec7a30f
 
 
 
 
5547093
ec7a30f
5aafb3a
 
ec7a30f
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# Optimized Dockerfile for Hugging Face Spaces with T4 GPU
# Pre-downloads models during build to eliminate cold-start delays

FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
    TZ=Etc/UTC \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-dev \
    python3-pip \
    build-essential \
    cmake \
    tesseract-ocr \
    poppler-utils \
    ffmpeg \
    git \
    curl \
    wget \
    && ln -sf /usr/bin/python3.10 /usr/bin/python \
    && ln -sf /usr/bin/python3.10 /usr/bin/python3 \
    && rm -rf /var/lib/apt/lists/*

# Upgrade pip and install setuptools compatible with openai-whisper
RUN python3 -m pip install --upgrade pip "setuptools<70.0.0" wheel

# ============================================================================
# Stage: Build and install dependencies
# ============================================================================
FROM base AS builder

WORKDIR /app

# Copy requirements file
COPY requirements.txt .

# Install dependencies causing build isolation issues first
# This ensures it uses the system setuptools<70.0.0
RUN pip install --no-cache-dir --no-build-isolation "numpy<2.0.0" openai-whisper==20231117

# Install Python dependencies
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt

# ============================================================================
# Stage: Model preloading
# ============================================================================
FROM builder AS model-cache

# Set persistent cache directories in the image (not /tmp)
ENV HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets

# Create cache directories
RUN mkdir -p $HF_HOME $TORCH_HOME $WHISPER_CACHE $MODEL_CACHE_DIR

# Copy preload script
# COPY scripts/preload_models.py /app/

# Pre-download all models during build
# This will cache models in the Docker image layer
# RUN python3 /app/preload_models.py

# Verify models were cached
RUN echo "Verifying cached models..." && \
    du -sh $HF_HOME $MODEL_CACHE_DIR $WHISPER_CACHE || true && \
    find $HF_HOME -type f -name "*.bin" -o -name "*.safetensors" -o -name "*.gguf" | head -20

# ============================================================================
# Stage: Final runtime image
# ============================================================================
FROM base AS runtime

# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy cached models from model-cache stage
COPY --from=model-cache /app/.cache /app/.cache
COPY --from=model-cache /app/models /app/models

# Set working directory
WORKDIR /app

# Copy application code
COPY . .

# Set environment variables for runtime
ENV PYTHONPATH=/app/services/ai-service/src:$PYTHONPATH \
    HF_HOME=/app/.cache/huggingface \
    TORCH_HOME=/app/.cache/torch \
    WHISPER_CACHE=/app/.cache/whisper \
    MODEL_CACHE_DIR=/app/models \
    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
    HF_DATASETS_CACHE=/app/.cache/huggingface/datasets \
    TRANSFORMERS_OFFLINE=0 \
    HF_HUB_OFFLINE=0 \
    CUDA_VISIBLE_DEVICES=0 \
    PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
    OMP_NUM_THREADS=4 \
    MKL_NUM_THREADS=4 \
    NUMEXPR_NUM_THREADS=4 \
    GGUF_N_THREADS=4 \
    GGUF_N_BATCH=128 \
    GGUF_N_GPU_LAYERS=32 \
    PRELOAD_GGUF=true \
    HF_SPACES=true \
    SPACE_ID=${SPACE_ID:-""} \
    MPLCONFIGDIR=/tmp/matplotlib

# Create runtime directories (for uploads, temp files, etc.)
RUN mkdir -p /tmp/uploads /tmp/matplotlib && \
    chmod -R 777 /tmp

# Copy and setup entrypoint script and configuration
COPY entrypoint.sh /entrypoint.sh
COPY scripts/verify_cache.py /app/verify_cache.py
COPY models_config.json /app/models_config.json
RUN chmod +x /entrypoint.sh

# Expose port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Set entrypoint
ENTRYPOINT ["/entrypoint.sh"]

# Start the application
# Use the root app.py which is designed for HF Spaces
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]