###############################################################################
# ml-service — HuggingFace Spaces Docker SDK (Gemma 4 E4B + transformers 5.5)
###############################################################################

FROM python:3.11-slim

# System deps for image processing (PIL / open-clip)
RUN apt-get update && \
    apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
    rm -rf /var/lib/apt/lists/*

# HF Spaces runs as uid 1000
RUN useradd -m -u 1000 appuser

WORKDIR /app

COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

COPY pyproject.toml uv.lock* ./
RUN uv sync --frozen --no-dev

COPY config/ config/
COPY models/ models/
COPY src/ src/
# _shared/ holds copies of monorepo packages/{ssrf-policy,pricing-config}/*.yaml
# staged by .github/workflows/sync-hf-space.yml so the Python `parents[3]`
# walk-up to monorepo root works in this flat container layout. The directory
# is .gitignore'd in monorepo dev (Python falls back to packages/ via
# _shared_paths.py); the sync workflow re-creates it before each upload.
COPY _shared/ _shared/

# Writable cache for HF model downloads (Gemma 4 E4B, FashionSigLIP, etc.)
RUN mkdir -p /tmp/hf_cache /tmp/torch_cache && \
    chown -R appuser:appuser /tmp/hf_cache /tmp/torch_cache /app

ENV DEVICE=cpu
ENV PORT=7860
ENV HF_HOME=/tmp/hf_cache
ENV TORCH_HOME=/tmp/torch_cache
ENV TRANSFORMERS_OFFLINE=0
# Production warm-start — preload chat (gemma-4-e4b) + trend_predict (TimesFM 2.5)
# at lifespan boot so first /chat or /api/v1/ml/trends/predict request doesn't pay
# ~30s / ~10s cold-load. Defaults in main.py are lazy (opt-in) because Apple Silicon
# dev hangs when Gemma + FashionSigLIP cohabit MPS unified memory; HF Spaces L40S
# has no such constraint. TimesFM 2.5 also requires .compile(ForecastConfig) which
# runs at first load — preload absorbs that one-time JIT cost.
ENV ML_PRELOAD_CHAT=1
ENV ML_PRELOAD_TIMESFM=1

EXPOSE 7860
USER appuser

CMD ["uv", "run", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]