runtime: docker sdk: docker python_version: "3.10" # Use custom Dockerfile with pre-cached models build: dockerfile: Dockerfile.hf-spaces # Enable Docker layer caching for faster rebuilds cache: true # Hardware requirements # Note: Remove or comment out if t4-medium is unavailable # You can also use: t4-small, cpu-upgrade, or a100-large hardware: gpu: t4-medium # 16GB GPU RAM, 16GB System RAM # Environment variables env: - SPACE_ID=$SPACE_ID - HF_HOME=/app/.cache/huggingface - TORCH_HOME=/app/.cache/torch - MODEL_CACHE_DIR=/app/models - PRELOAD_GGUF=true - HF_SPACES=true - CUDA_VISIBLE_DEVICES=0 - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 # Note: TRANSFORMERS_OFFLINE is NOT set - allows runtime model downloads # Pre-cached models load instantly, other models download on-demand