import os
# The Boogu transformer/pipeline select their attention + norm kernels based on
# this env var at construction time, so it must be set before importing torch.
os.environ.setdefault("device", "cuda:0")
# Use the pure-torch RMSNorm path (not the triton fused kernel) so the block
# parameter layout matches the AoTI graph compiled in the companion Space.
import boogu.utils.import_utils as _import_utils
_import_utils._triton_available = False
import base64
import csv
import io
import json
import sys
# Example caching writes the cached output (which embeds the base64 before/after
# data URIs) through the csv module; bump the field limit so large frames don't
# trip "_csv.Error: field larger than field limit".
csv.field_size_limit(sys.maxsize)
import spaces
import torch
import gradio as gr
from PIL import Image
from boogu.pipelines.boogu.pipeline_boogu import BooguImagePipeline
from boogu.pipelines.boogu.pipeline_boogu_turbo import BooguImageTurboPipeline
MODEL_ID = "Boogu/Boogu-Image-0.1-Edit"
TURBO_ID = "Boogu/Boogu-Image-0.1-Turbo"
AOTI_REPO = "multimodalart/Boogu-Image-0.1-Edit-aoti"
# Set to a Turbo AoTI repo to patch the Turbo single-stream blocks (None = eager).
# Flip between "...-Turbo-aoti" (default compile) and "...-Turbo-aoti-mat" (max_autotune)
# to A/B the compiled variants. Leave None to keep the eager 3.3s baseline.
TURBO_AOTI_REPO = os.environ.get("TURBO_AOTI_REPO") or None
pipe = BooguImagePipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)
pipe.to("cuda")
# Turbo shares the (byte-identical) mllm / vae / processor / scheduler with Edit;
# only the transformer differs. Load just the Turbo transformer and build a Turbo
# pipeline reusing the already-resident components — no duplicate 17.5GB mllm.
turbo_transformer = type(pipe.transformer).from_pretrained(
TURBO_ID,
subfolder="transformer",
torch_dtype=torch.bfloat16,
)
_turbo_components = dict(pipe.components)
_turbo_components["transformer"] = turbo_transformer
turbo_pipe = BooguImageTurboPipeline(**_turbo_components)
turbo_pipe.text_instruction_rewriter = pipe.text_instruction_rewriter
turbo_pipe.instruction_rewriter_processor = pipe.instruction_rewriter_processor
turbo_pipe.to("cuda")
# Swap the 24 repeated single-stream blocks for their AoTI-compiled graph
# (one shared compiled graph, per-block weights). Falls back to eager on any error.
# Only the Edit transformer is compiled for now; Turbo runs eager (baseline).
try:
from pathlib import Path
from huggingface_hub import snapshot_download
from spaces.zero.torch.aoti import aoti_load_from_module_dir
_block_dir = Path(snapshot_download(AOTI_REPO)) / "BooguImageTransformerBlock"
if (_block_dir / "package.pt2").exists():
aoti_load_from_module_dir(pipe.transformer.single_stream_layers, _block_dir)
print(f"AoTI: patched {len(pipe.transformer.single_stream_layers)} Edit single-stream blocks")
else:
print("AoTI: Edit package.pt2 not found, running eager")
except Exception as exc: # noqa: BLE001
print(f"AoTI (Edit) load failed ({exc!r}); running eager")
# Optionally patch the Turbo single-stream blocks too (off by default = eager baseline).
if TURBO_AOTI_REPO:
try:
from pathlib import Path
from huggingface_hub import snapshot_download
from spaces.zero.torch.aoti import aoti_load_from_module_dir
_t_dir = Path(snapshot_download(TURBO_AOTI_REPO)) / "BooguImageTransformerBlock"
if (_t_dir / "package.pt2").exists():
aoti_load_from_module_dir(turbo_pipe.transformer.single_stream_layers, _t_dir)
print(f"AoTI: patched {len(turbo_pipe.transformer.single_stream_layers)} Turbo blocks from {TURBO_AOTI_REPO}")
else:
print(f"AoTI: Turbo package.pt2 not found in {TURBO_AOTI_REPO}, running eager")
except Exception as exc: # noqa: BLE001
print(f"AoTI (Turbo) load failed ({exc!r}); running eager")
# EXPERIMENT (#10): optionally patch the 2 Turbo double-stream blocks with a second
# AoTI graph. WARNING: that graph bakes the captured per-sample seq lengths as
# constants (the block takes them as python int lists, not dynamic tensors), so it
# is only correct for prompts whose instruction tokenizes to the captured length.
DS_TURBO_AOTI_REPO = os.environ.get("DS_TURBO_AOTI_REPO") or None
if DS_TURBO_AOTI_REPO:
try:
from pathlib import Path
from huggingface_hub import snapshot_download
from spaces.zero.torch.aoti import aoti_load_from_module_dir
_ds_dir = Path(snapshot_download(DS_TURBO_AOTI_REPO)) / "BooguImageDoubleStreamTransformerBlock"
if (_ds_dir / "package.pt2").exists():
aoti_load_from_module_dir(turbo_pipe.transformer.double_stream_layers, _ds_dir)
print(f"AoTI: patched {len(turbo_pipe.transformer.double_stream_layers)} Turbo double-stream blocks from {DS_TURBO_AOTI_REPO}")
else:
print(f"AoTI: Turbo double-stream package.pt2 not found in {DS_TURBO_AOTI_REPO}, running eager")
except Exception as exc: # noqa: BLE001
print(f"AoTI (Turbo double-stream) load failed ({exc!r}); running eager")
MAX_SEED = 2**31 - 1
def _data_uri(img):
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=92)
return "data:image/webp;base64," + base64.b64encode(buf.getvalue()).decode()
# Custom before/after comparison built on gr.HTML (gr.ImageSlider is broken with
# gr.Examples caching on this Gradio build and doesn't keep the two sides aligned).
# Markup/CSS mirror Gradio's native ImageSlider: both images fill the same box with
# object-fit:contain so they line up regardless of native size; the edited ("after")
# image is revealed by a clip-path driven by an accent-pill handle on a 1px divider.
# NOTE: Gradio evaluates html_template via `new Function(..., "return `" + tpl + "`")`,
# i.e. it wraps the whole template in backticks. So the template must NOT contain any
# backticks of its own (nested template literals terminate the wrapper and silently
# blank the component) — build the markup with single-quote string concatenation.
# Native-style floating block label (icon + text), mirroring Gradio's block-label.
_BA_LABEL = (
''
)
_BA_DOWNLOAD_ICON = (
''
)
# value arrives as a JSON string (see edit()); parse it defensively. An IIFE keeps
# this a single ${...} expression with no backticks.
BA_HTML = (
"${(function(){\n"
" var d = {};\n"
" try { d = value ? JSON.parse(value) : {}; } catch (e) { d = {}; }\n"
" return (d && d.after)\n"
" ? '