runtime: docker
sdk: docker
python_version: "3.10"

# Use custom Dockerfile with pre-cached models
build:
  dockerfile: Dockerfile.hf-spaces
  # Enable Docker layer caching for faster rebuilds
  cache: true

# Hardware requirements
# Note: Remove or comment out if t4-medium is unavailable
# You can also use: t4-small, cpu-upgrade, or a100-large
hardware:
  gpu: t4-medium  # 16GB GPU RAM, 16GB System RAM
  
# Environment variables
env:
  - SPACE_ID=$SPACE_ID
  - HF_HOME=/app/.cache/huggingface
  - TORCH_HOME=/app/.cache/torch
  - MODEL_CACHE_DIR=/app/models
  - PRELOAD_GGUF=true
  - HF_SPACES=true
  - CUDA_VISIBLE_DEVICES=0
  - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
  # Note: TRANSFORMERS_OFFLINE is NOT set - allows runtime model downloads
  # Pre-cached models load instantly, other models download on-demand