# 轻量UNet配置 model: in_channels: 4 # 潜在空间通道数 out_channels: 4 base_channels: 64 channel_mults: [1, 2, 4, 8] # 4次下采样 num_res_blocks: 2 attention_resolutions: [8] # 仅在最低分辨率应用注意力 dropout: 0.0 use_checkpoint: true num_heads: 4 # 文本条件 context_dim: 768 # CLIP文本编码维度 use_linear_projection: true # 时间步嵌入 time_embed_dim: 256 # 优化配置 use_flash_attention: false # P4不支持,但保留选项 gradient_checkpointing: true