############################################################################### # ml-service — HuggingFace Spaces Docker SDK (Gemma 4 E4B + transformers 5.5) ############################################################################### FROM python:3.11-slim # System deps for image processing (PIL / open-clip) RUN apt-get update && \ apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \ rm -rf /var/lib/apt/lists/* # HF Spaces runs as uid 1000 RUN useradd -m -u 1000 appuser WORKDIR /app COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv COPY pyproject.toml uv.lock* ./ RUN uv sync --frozen --no-dev COPY config/ config/ COPY models/ models/ COPY src/ src/ # _shared/ holds copies of monorepo packages/{ssrf-policy,pricing-config}/*.yaml # staged by .github/workflows/sync-hf-space.yml so the Python `parents[3]` # walk-up to monorepo root works in this flat container layout. The directory # is .gitignore'd in monorepo dev (Python falls back to packages/ via # _shared_paths.py); the sync workflow re-creates it before each upload. COPY _shared/ _shared/ # Writable cache for HF model downloads (Gemma 4 E4B, FashionSigLIP, etc.) RUN mkdir -p /tmp/hf_cache /tmp/torch_cache && \ chown -R appuser:appuser /tmp/hf_cache /tmp/torch_cache /app ENV DEVICE=cpu ENV PORT=7860 ENV HF_HOME=/tmp/hf_cache ENV TORCH_HOME=/tmp/torch_cache ENV TRANSFORMERS_OFFLINE=0 # Production warm-start — preload chat (gemma-4-e4b) + trend_predict (TimesFM 2.5) # at lifespan boot so first /chat or /api/v1/ml/trends/predict request doesn't pay # ~30s / ~10s cold-load. Defaults in main.py are lazy (opt-in) because Apple Silicon # dev hangs when Gemma + FashionSigLIP cohabit MPS unified memory; HF Spaces L40S # has no such constraint. TimesFM 2.5 also requires .compile(ForecastConfig) which # runs at first load — preload absorbs that one-time JIT cost. ENV ML_PRELOAD_CHAT=1 ENV ML_PRELOAD_TIMESFM=1 EXPOSE 7860 USER appuser CMD ["uv", "run", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]