sachinchandrankallar commited on
Commit
3ccf7f0
·
1 Parent(s): e944b46

`Optimized Dockerfile and .dockerignore for Hugging Face Spaces`

Browse files
Files changed (2) hide show
  1. .dockerignore +88 -76
  2. Dockerfile +41 -47
.dockerignore CHANGED
@@ -1,85 +1,97 @@
1
- # Git and version control
2
- .git
3
- .gitignore
4
- .gitattributes
5
 
6
- # Python
7
- __pycache__/
8
- *.py[cod]
9
- *$py.class
10
- *.so
11
- *.egg
12
- *.egg-info/
13
- dist/
14
- build/
15
- *.whl
16
 
17
- # Virtual environments
18
- .env
19
- .venv
20
- venv/
21
- ENV/
22
- env/
23
- .ENV/
24
 
25
- # IDE and editor files
26
- .vscode/
27
- .idea/
28
- *.swp
29
- *.swo
30
- *~
31
 
32
- # OS files
33
- .DS_Store
34
- .DS_Store?
35
- ._*
36
- .Spotlight-V100
37
- .Trashes
38
- ehthumbs.db
39
- Thumbs.db
 
 
 
 
 
 
40
 
41
- # Logs and temporary files
42
- *.log
43
- *.tmp
44
- *.temp
45
- *.out
46
- docker_build.log
 
47
 
48
- # Documentation (if not needed in container)
49
- *.md
50
- !README.md
51
- docs/
52
- *.ipynb
 
 
 
 
 
 
 
 
 
53
 
54
- # Large model and data files
55
- *.pt
56
- *.pth
57
- *.ckpt
58
- *.h5
59
- *.onnx
60
- *.npz
61
- *.npy
62
- *.tar.gz
63
- *.zip
64
- *.tar
65
- *.gz
66
- *.bz2
67
- *.7z
68
- *.rar
69
- datasets/
70
- models/
71
- outputs/
72
- uploads/
73
- *.wav
74
- *.mp4
75
- *.mp3
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # Node modules (if any)
78
- node_modules/
 
79
 
80
- # Testing and development
81
- .pytest_cache/
82
- .coverage
83
- htmlcov/
84
- .tox/
85
- .cache
 
1
+ # Build context optimization for Hugging Face Spaces
2
+ # Exclude everything by default, then include only what's needed
 
 
3
 
4
+ # Exclude all files and directories first
5
+ *
 
 
 
 
 
 
 
 
6
 
7
+ # Include only essential files for the application
8
+ !requirements.txt
9
+ !README.md
10
+ !ai_med_extract.py
 
 
 
11
 
12
+ # Include source code (but not cache files)
13
+ !services/ai-service/src/ai_med_extract/
14
+ !services/ai-service/src/app.py
15
+ !services/ai-service/src/config_settings.py
16
+ !services/ai-service/src/gradio_app.py
17
+ !services/ai-service/src/wsgi.py
18
 
19
+ # Exclude Python cache and build artifacts
20
+ **/__pycache__/
21
+ **/*.py[cod]
22
+ **/*$py.class
23
+ **/*.so
24
+ **/*.egg
25
+ **/*.egg-info/
26
+ **/dist/
27
+ **/build/
28
+ **/*.whl
29
+ **/.pytest_cache/
30
+ **/htmlcov/
31
+ **/.tox/
32
+ **/.coverage
33
 
34
+ # Exclude virtual environments and local setup
35
+ **/.env*
36
+ **/venv/
37
+ **/.venv/
38
+ **/ENV/
39
+ **/env/
40
+ **/.ENV/
41
 
42
+ # Exclude IDE and development files
43
+ **/.vscode/
44
+ **/.idea/
45
+ **/*.swp
46
+ **/*.swo
47
+ **/*~
48
+ **/.DS_Store*
49
+
50
+ # Exclude logs and temporary files
51
+ **/*.log
52
+ **/*.tmp
53
+ **/*.temp
54
+ **/*.out
55
+ **/docker_build.log
56
 
57
+ # Exclude large media and model files
58
+ **/*.pt
59
+ **/*.pth
60
+ **/*.ckpt
61
+ **/*.h5
62
+ **/*.onnx
63
+ **/*.npz
64
+ **/*.npy
65
+ **/*.tar.gz
66
+ **/*.zip
67
+ **/*.tar
68
+ **/*.gz
69
+ **/*.bz2
70
+ **/*.7z
71
+ **/*.rar
72
+ **/*.wav
73
+ **/*.mp4
74
+ **/*.mp3
75
+ **/datasets/
76
+ **/models/
77
+ **/outputs/
78
+ **/uploads/
79
+
80
+ # Exclude git and version control (except what's needed)
81
+ **/.git/
82
+ **/.gitignore
83
+ **/.gitattributes
84
+
85
+ # Exclude documentation (except README)
86
+ **/*.md
87
+ !README.md
88
+ **/docs/
89
+ **/*.ipynb
90
 
91
+ # Exclude node modules and other package managers
92
+ **/node_modules/
93
+ **/package*.json
94
 
95
+ # Exclude testing and development artifacts
96
+ **/.cache/
97
+ **/node_modules/
 
 
 
Dockerfile CHANGED
@@ -128,65 +128,59 @@
128
  # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "8", "--timeout", "0", "ai_med_extract.app:app"]
129
 
130
 
131
- # Optimized Dockerfile for Hugging Face Spaces
132
- # Single-stage build for better cache performance
133
 
134
  FROM python:3.10-slim
135
 
136
- ARG DEBIAN_FRONTEND=noninteractive
137
- ENV TZ=Etc/UTC
138
-
139
- # Install system dependencies in a single layer
140
- RUN apt-get update && apt-get install -y --no-install-recommends \
141
- tzdata \
142
- tesseract-ocr \
143
- poppler-utils \
144
- ffmpeg \
145
- && ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \
146
  && dpkg-reconfigure -f noninteractive tzdata \
147
  && rm -rf /var/lib/apt/lists/* \
148
- && apt-get clean
 
149
 
150
- # Set working directory
151
  WORKDIR /app
152
 
153
- # Copy requirements first for better Docker layer caching
154
  COPY requirements.txt .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- # Install Python dependencies
157
- RUN pip install --no-cache-dir --upgrade pip && \
158
- pip install --no-cache-dir --prefer-binary -r requirements.txt
159
 
160
- # Copy application code
161
- COPY . .
162
 
163
- # Runtime cache dirs (kept in /tmp, auto-cleared on restart)
164
- ENV HF_HOME=/tmp/huggingface \
165
- XDG_CACHE_HOME=/tmp \
166
- TORCH_HOME=/tmp/torch \
167
- WHISPER_CACHE=/tmp/whisper \
168
- PYTHONUNBUFFERED=1 \
169
- PYTHONPATH=/app \
170
- GGUF_N_THREADS=2 \
171
- GGUF_N_BATCH=64 \
172
- OMP_NUM_THREADS=2 \
173
- MKL_NUM_THREADS=2 \
174
- NUMEXPR_NUM_THREADS=2
175
-
176
- # Ensure writable directories exist
177
- RUN mkdir -p /tmp/uploads /tmp/huggingface /tmp/torch /tmp/whisper && \
178
- chmod -R 777 /tmp
179
-
180
- # Add entrypoint script that clears cache/models before app starts
181
- RUN echo '#!/bin/bash\n\
182
- echo "[ENTRYPOINT] Clearing Hugging Face / Torch / tmp cache..."\n\
183
- rm -rf /tmp/* ~/.cache/huggingface ~/.cache/torch || true\n\
184
- mkdir -p /tmp/uploads /tmp/huggingface /tmp/torch /tmp/whisper\n\
185
- chmod -R 777 /tmp\n\
186
- exec "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
187
-
188
- ENTRYPOINT ["/entrypoint.sh"]
189
 
190
  EXPOSE 7860
191
 
192
- CMD ["uvicorn", "ai_med_extract.app:create_app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
 
 
128
  # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "8", "--timeout", "0", "ai_med_extract.app:app"]
129
 
130
 
131
+ # Ultra-minimal Dockerfile optimized for Hugging Face Spaces
132
+ # Based on Spaces best practices for maximum cache efficiency
133
 
134
  FROM python:3.10-slim
135
 
136
+ # Minimize layers and maximize cache hits
137
+ RUN apt-get update \
138
+ && apt-get install -y --no-install-recommends \
139
+ tzdata \
140
+ tesseract-ocr \
141
+ poppler-utils \
142
+ ffmpeg \
143
+ && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
 
 
144
  && dpkg-reconfigure -f noninteractive tzdata \
145
  && rm -rf /var/lib/apt/lists/* \
146
+ && apt-get clean \
147
+ && rm -rf /tmp/*
148
 
 
149
  WORKDIR /app
150
 
151
+ # Copy and install requirements in one layer for better caching
152
  COPY requirements.txt .
153
+ RUN pip install --no-cache-dir --upgrade pip \
154
+ && pip install --no-cache-dir -r requirements.txt
155
+
156
+ # Copy application code - only essential files
157
+ COPY ai_med_extract.py .
158
+ COPY services/ai-service/src/ai_med_extract ./ai_med_extract/
159
+ COPY services/ai-service/src/app.py ./ai_med_extract/
160
+ COPY services/ai-service/src/config_settings.py ./ai_med_extract/
161
+ COPY services/ai-service/src/gradio_app.py ./ai_med_extract/
162
+ COPY services/ai-service/src/wsgi.py ./ai_med_extract/
163
+
164
+ # Essential environment variables for Spaces compatibility
165
+ ENV PYTHONUNBUFFERED=1 \
166
+ PYTHONPATH=/app \
167
+ HF_HOME=/tmp \
168
+ XDG_CACHE_HOME=/tmp \
169
+ TORCH_HOME=/tmp \
170
+ WHISPER_CACHE=/tmp \
171
+ FAST_MODE=true \
172
+ PRELOAD_SMALL_MODELS=false
173
 
174
+ # Create necessary directories with correct permissions
175
+ RUN mkdir -p /tmp/uploads /tmp/huggingface /tmp/torch /tmp/whisper \
176
+ && chmod -R 777 /tmp
177
 
178
+ # Simple entrypoint - minimal script to avoid cache issues
179
+ RUN echo '#!/bin/bash\nrm -rf /tmp/* ~/.cache/* || true\nmkdir -p /tmp/uploads /tmp/huggingface /tmp/torch /tmp/whisper\nchmod -R 777 /tmp\nexec "$@"' > /start.sh && chmod +x /start.sh
180
 
181
+ ENTRYPOINT ["/start.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  EXPOSE 7860
184
 
185
+ # Use uvicorn to serve the FastAPI app directly
186
+ CMD ["uvicorn", "ai_med_extract.app:app", "--host", "0.0.0.0", "--port", "7860"]