"""Generate an image with the Q4_K Ideogram 4 DiT.

  python download_deps.py            # one time (gated base-repo access required)
  python usage.py "a poster that says HELLO"

Memory: the FP8 pipeline is large; on a 24 GB card you may need an offload/
sequential-load recipe (see recipe-q8_0.json).
"""
import sys
import torch
from ideogram4 import Ideogram4Pipeline, Ideogram4PipelineConfig
from gguf_loader import load_gguf_tensors, swap_branch

GGUF = "ideogram4-q8_0.gguf"
prompt = sys.argv[1] if len(sys.argv) > 1 else 'a storefront sign that says "FRESH COFFEE"'

pipe = Ideogram4Pipeline.from_pretrained(
    config=Ideogram4PipelineConfig(weights_repo="ideogram-ai/ideogram-4-fp8"),
    device="cuda", dtype=torch.bfloat16)

g = load_gguf_tensors(GGUF)
print("linears swapped:",
      swap_branch(pipe.conditional_transformer, g, "cond"),
      swap_branch(pipe.unconditional_transformer, g, "uncond"))

img = pipe(prompt, num_steps=48, height=1024, width=1024, seed=1000)[0]
img.save("out.png")
print("saved out.png")