# 轻量UNet配置
model:
  in_channels: 4  # 潜在空间通道数
  out_channels: 4
  base_channels: 64
  channel_mults: [1, 2, 4, 8]  # 4次下采样
  num_res_blocks: 2
  attention_resolutions: [8]  # 仅在最低分辨率应用注意力
  dropout: 0.0
  use_checkpoint: true
  num_heads: 4
  
  # 文本条件
  context_dim: 768  # CLIP文本编码维度
  use_linear_projection: true
  
  # 时间步嵌入
  time_embed_dim: 256
  
  # 优化配置
  use_flash_attention: false  # P4不支持，但保留选项
  gradient_checkpointing: true