import os import subprocess import sys # ZeroGPU: torch.compile / dynamo unsupported — disable before any torch import. os.environ["TORCH_COMPILE_DISABLE"] = "1" os.environ["TORCHDYNAMO_DISABLE"] = "1" # (removed runtime xformers install -> would pull torch 2.8 and break the AOTI .pt2; SDPA used) # --- clone + install the NATIVE LTX-2 codebase at the pinned commit the working ZeroGPU spaces use --- LTX_REPO_URL = "https://github.com/Lightricks/LTX-2.git" LTX_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "LTX-2") LTX_COMMIT = "ae855f8538843825f9015a419cf4ba5edaf5eec2" if not os.path.exists(LTX_REPO_DIR): subprocess.run(["git", "clone", LTX_REPO_URL, LTX_REPO_DIR], check=True) subprocess.run(["git", "-C", LTX_REPO_DIR, "checkout", LTX_COMMIT], check=True) subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-core"), "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines")], check=True) sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src")) sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src")) import logging import random import tempfile import numpy as np import imageio.v3 as iio from PIL import Image, ImageOps import torch torch._dynamo.config.suppress_errors = True torch._dynamo.config.disable = True import spaces import gradio as gr from huggingface_hub import hf_hub_download, snapshot_download # Import LTX modules in the proven order — importing ltx_core.quantization/loader FIRST hits a # circular import (fp8_cast <-> loader.fuse_loras). Importing the model modules first forces the # correct init order (mirrors the working reference Space). from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as _vae_decode_video # noqa: F401 from ltx_core.model.upsampler import upsample_video as _upsample_video # noqa: F401 from ltx_core.model.audio_vae import encode_audio as _vae_encode_audio # noqa: F401 from ltx_core.quantization import QuantizationPolicy from ltx_core.loader import LoraPathStrengthAndSDOps, LTXV_LORA_COMFY_RENAMING_MAP from ltx_pipelines.ic_lora import ICLoraPipeline from ltx_pipelines.utils.media_io import encode_video # --- ZeroGPU loader patch ------------------------------------------------------------- # The native loader opens safetensors directly on the CUDA device # (safe_open(path, device="cuda")), doing the host->device copy in safetensors' own C++ # (cudaMemcpy) — bypassing torch.Tensor.to, the call ZeroGPU patches to virtualise + pack # weights at module scope. Result: "No CUDA GPUs are available" at startup, nothing packs. # Patch it to open on CPU then move via torch.Tensor.to (ZeroGPU-virtualisable). import safetensors as _safetensors import ltx_core.loader.sft_loader as _sft from ltx_core.loader.primitives import StateDict as _StateDict def _zerogpu_safe_load(self, path, sd_ops, device=None): device = device or torch.device("cpu") sd, size, dtype = {}, 0, set() model_paths = path if isinstance(path, list) else [path] for shard_path in model_paths: with _safetensors.safe_open(shard_path, framework="pt", device="cpu") as f: for name in f.keys(): expected = name if sd_ops is None else sd_ops.apply_to_key(name) if expected is None: continue value = f.get_tensor(name).to(device=device) # torch path -> ZeroGPU-virtualised kvs = ((expected, value),) if sd_ops is not None: kvs = sd_ops.apply_to_key_value(expected, value) for k, v in kvs: size += v.nbytes dtype.add(v.dtype) sd[k] = v return _StateDict(sd=sd, device=device, size=size, dtype=dtype) _sft.SafetensorsStateDictLoader.load = _zerogpu_safe_load print("[PATCH] safetensors loader -> CPU-open + torch.to (ZeroGPU-virtualisable)") # -------------------------------------------------------------------------------------- # --- attention backend patch (FA3 crashes on Blackwell ZeroGPU; use xformers/SDPA) --- import torch.nn.functional as F from ltx_core.model.transformer import attention as _attn_mod def _sdpa_as_mea(query, key, value, attn_bias=None, scale=None, **kwargs): q, k, v = query.transpose(1, 2), key.transpose(1, 2), value.transpose(1, 2) return F.scaled_dot_product_attention(q, k, v, scale=scale).transpose(1, 2) # IMPORTANT (ZeroGPU): never query CUDA at module scope. SDPA works on every GPU (incl. # Blackwell ZeroGPU, where FA3 crashes), so patch it unconditionally. _attn_mod.memory_efficient_attention = _sdpa_as_mea print("[ATTN] SDPA (patched at module scope, no CUDA query)") logging.getLogger().setLevel(logging.INFO) # =========================== PER-LORA CONFIG (colorize) =========================== TITLE = "LTX-2.3 Add Water (native LTX-2)" LORA_REPO = "Lightricks/LTX-2.3-22b-IC-LoRA-Water-Simulation" LORA_FILE = "ltx-2.3-22b-ic-lora-water-simulation-0.9.safetensors" LORA_SCALE = 1.0 SKIP_STAGE_2 = True GRAYSCALE_REF = False RES_PRESETS = {"960×544 (fast)": (960, 544), "1216×704 (recommended)": (1216, 704)} DEFAULT_PRESET = "1216×704 (recommended)" FRAME_CHOICES = [49, 73, 97, 121] DEFAULT_FRAMES = 73 def build_prompt(p): return ( "Reference shows the dry scene. Edited shows the same scene with realistic, naturally-moving water added. " f"ADD WATER {p.strip()}. " "Subject identity, framing and motion are identical to the reference; only water is added." ) EXAMPLES = [ ["examples/landscape_dry.mp4", "a wide river flooding across the valley with glassy rippling reflections and drifting foam, mist rising; flowing water and a distant waterfall", "1216×704 (recommended)", 73, 42, False], ["examples/man_dancing_dry.mp4", "a clear shallow stream rushing and braiding around their legs with white foam and splashes; rushing water and rhythmic splashes", "1216×704 (recommended)", 73, 42, False], ] # ================================================================================= FPS = 24.0 MAX_SEED = np.iinfo(np.int32).max HF_TOKEN = os.environ.get("HF_TOKEN") LTX_MODEL_REPO = "Lightricks/LTX-2.3" GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized" def _src_fps(path, default=FPS): try: return float(iio.immeta(path, plugin="pyav").get("fps", default)) or default except Exception: return default def _prep_reference(path, width, height, num_frames): """Resample to 24fps, aspect-fit/crop to WxH, NF frames; (optionally grayscale); write temp mp4.""" vid = iio.imread(path, plugin="pyav") src_fps = _src_fps(path) n = len(vid) out = [] for i in range(num_frames): idx = min(int(round(i / FPS * src_fps)), n - 1) im = Image.fromarray(vid[idx]).convert("RGB") im = ImageOps.fit(im, (width, height), Image.LANCZOS) if GRAYSCALE_REF: im = im.convert("L").convert("RGB") out.append(np.array(im)) tmp = tempfile.mktemp(suffix=".mp4") iio.imwrite(tmp, np.stack(out), fps=FPS, plugin="pyav", codec="libx264") return tmp def _pick_resolution(path, preset): w, h = RES_PRESETS[preset] try: f0 = iio.imread(path, plugin="pyav", index=0) if f0.shape[0] > f0.shape[1]: # portrait w, h = h, w except Exception: pass return w, h # --- Load native pipeline + IC-LoRA once at module scope (ZeroGPU packs weights here) --- print("Downloading checkpoints…") checkpoint_path = hf_hub_download(LTX_MODEL_REPO, "ltx-2.3-22b-distilled-1.1.safetensors", token=HF_TOKEN) spatial_upsampler_path = hf_hub_download(LTX_MODEL_REPO, "ltx-2.3-spatial-upscaler-x2-1.1.safetensors", token=HF_TOKEN) gemma_root = snapshot_download(GEMMA_REPO, token=HF_TOKEN) lora_path = hf_hub_download(LORA_REPO, LORA_FILE, token=HF_TOKEN) print("Building ICLoraPipeline…") pipeline = ICLoraPipeline( distilled_checkpoint_path=checkpoint_path, spatial_upsampler_path=spatial_upsampler_path, gemma_root=gemma_root, loras=[LoraPathStrengthAndSDOps(lora_path, LORA_SCALE, LTXV_LORA_COMFY_RENAMING_MAP)], # bf16 (NOT fp8): the IC-LoRA is fused into the transformer at MODULE SCOPE (the GPU # worker can't re-open the checkpoint file). fp8_cast()'s fusion runs a custom CUDA kernel # that can't be ZeroGPU-virtualised; the bf16 fuse rule is pure torch -> virtualisable. quantization=None, ) def _preload_pin(ledger, tag): if ledger is None: return for name in ["transformer", "video_encoder", "video_decoder", "audio_encoder", "audio_decoder", "vocoder", "spatial_upsampler", "text_encoder", "gemma_embeddings_processor"]: fn = getattr(ledger, name, None) if callable(fn): try: obj = fn() setattr(ledger, name, (lambda o=obj: o)) print(f"[preload {tag}] {name} ✓") except Exception as e: print(f"[preload {tag}] {name} skipped: {e}") # Preload stage 1 always; preload stage 2 only when two-stage is used (skip_stage_2=False). # Eagerly pinning both ledgers materializes TWO ~46GB transformers — too big for the ZeroGPU pack. _preload_pin(getattr(pipeline, "stage_1_model_ledger", None), "stage1") if not SKIP_STAGE_2: _preload_pin(getattr(pipeline, "stage_2_model_ledger", None), "stage2") print("Pipeline ready.") # ============================ AOTI (native bf16 transformer graph) ============================ AOTI_REPO = os.environ.get("AOTI_REPO", "linoyts/LTX-2.3-Native-Transformer-GroupA-sm120-cu130-r20") import types as _types from dataclasses import replace as _dc_replace from ltx_core.model.transformer.transformer_args import TransformerArgs as _TA _TA_FIELDS = list(_TA.__dataclass_fields__.keys()) def _flatten_ta(ta): out = [] for f in _TA_FIELDS: v = getattr(ta, f) if torch.is_tensor(v): out.append(v) elif isinstance(v, tuple) and len(v) > 0 and all(torch.is_tensor(x) for x in v): out.extend(v) return out def _install_aoti(): velocity = pipeline.stage_1_model_ledger.transformer().velocity_model spaces.aoti_load(module=velocity, repo_id=AOTI_REPO) def _proc(self, video, audio, perturbations): for blk in self.transformer_blocks: o = blk(*(_flatten_ta(video) + _flatten_ta(audio))) video = _dc_replace(video, x=o[0]); audio = _dc_replace(audio, x=o[1]) return video, audio velocity._process_transformer_blocks = _types.MethodType(_proc, velocity) print(f"[AOTI] loaded {AOTI_REPO} + patched block loop", flush=True) print(f"[AOTI] base torch={torch.__version__} cuda={torch.version.cuda}", flush=True) try: _install_aoti(); print("[AOTI] OK", flush=True) except Exception as _e: import traceback; traceback.print_exc(); print(f"[AOTI] FAILED ({_e!r}) -> EAGER", flush=True) # ============================================================================================== def _duration(*args, **kwargs): nf = next((a for a in args if isinstance(a, int) and a in FRAME_CHOICES), DEFAULT_FRAMES) return int(60 + nf * 1.2) @spaces.GPU(duration=_duration) @torch.inference_mode() def add_water(video, prompt, preset, num_frames, seed, randomize, progress=gr.Progress(track_tqdm=True)): if video is None: raise gr.Error("Please upload a video.") if not prompt.strip(): raise gr.Error("Describe the result (e.g. 'a brown rabbit on grey rocks, soft birdsong').") seed = random.randint(0, MAX_SEED) if randomize else int(seed) num_frames = int(num_frames) width, height = _pick_resolution(video, preset) ref_path = _prep_reference(video, width, height, num_frames) tiling = TilingConfig.default() # skip_stage_2 outputs at half the passed dims -> pass 2x so output matches the preset. gen_w, gen_h = (width * 2, height * 2) if SKIP_STAGE_2 else (width, height) video_out, audio_out = pipeline( prompt=build_prompt(prompt), seed=seed, height=gen_h, width=gen_w, num_frames=num_frames, frame_rate=FPS, images=[], video_conditioning=[(ref_path, 1.0)], skip_stage_2=SKIP_STAGE_2, tiling_config=tiling, ) out_path = tempfile.mktemp(suffix=".mp4") encode_video(video=video_out, fps=FPS, audio=audio_out, output_path=out_path, video_chunks_number=get_video_chunks_number(num_frames, tiling)) return out_path, seed # --- UI config (match the public Space exactly) --- RES_PRESETS = {"960×544 (fast)": (960, 544), "1216×704 (recommended)": (1216, 704), "1536×864 (high)": (1536, 864), "1920×1088 (native)": (1920, 1088)} FRAME_CHOICES = [49, 73, 97, 121] LORA_SCALE = 1.2 # native fixed scale (matches public default) with gr.Blocks(title="LTX-2.3 Water Simulation") as demo: gr.Markdown( "# 🌊 LTX-2.3 Water Simulation\n" "Add believable, naturally-moving water to a dry clip — rivers, surf, rain, waterfalls, floods, " "splashes — that interacts with the moving scene, while maintaining subject and framing identity. " "Using [LTX 2.3 Distilled](https://huggingface.co/Lightricks/LTX-2.3) with the " "[Water Simulation IC-LoRA](https://huggingface.co/Lightricks/LTX-2.3-22b-IC-LoRA-Water-Simulation)." ) gr.Markdown("⚡ **Accelerated with [AOTI](https://huggingface.co/linoyts/LTX-2.3-Native-Transformer-GroupA-sm120-cu130-r20)** — precompiled transformer for faster inference.") with gr.Row(): with gr.Column(): video_in = gr.Video(label="Dry input video") prompt = gr.Textbox( label="Describe the water — type, motion, how it interacts, plus any sounds", lines=3, placeholder="a clear shallow stream braiding around their legs with white foam crests and glistening wet ground; rushing water, gentle splashing", ) with gr.Accordion("Settings", open=False): preset = gr.Dropdown(list(RES_PRESETS), value="1216×704 (recommended)", label="Resolution") num_frames = gr.Dropdown(FRAME_CHOICES, value=73, label="Frames (24fps)") randomize = gr.Checkbox(True, label="Randomize seed") seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed") run = gr.Button("Add water", variant="primary") with gr.Column(): video_out = gr.Video(label="Result with water") run.click(add_water, inputs=[video_in, prompt, preset, num_frames, seed, randomize], outputs=[video_out, seed]) gr.Examples( examples=[ ['examples/man_dancing_dry.mp4', 'a clear, shallow stream rushing and braiding around their legs — cold mountain water swirling with white foam crests and glassy ripples, the wet floor glistening and throwing back reflections, bright droplets kicked up with every step; lively rushing water and rhythmic splashes as they move', '1216×704 (recommended)', 73, 42, False], ['examples/landscape_dry.mp4', 'a wide river flooding across the valley floor — the water spreading in glassy sheets with rippling reflections of the sky and drifting ribbons of white foam, a soft mist rising off the surface in the cool light; steadily flowing water and the distant rush of a waterfall', '1216×704 (recommended)', 73, 42, False], ], inputs=[video_in, prompt, preset, num_frames, seed, randomize], outputs=[video_out, seed], fn=add_water, cache_examples=True, cache_mode="lazy", ) if __name__ == "__main__": demo.launch(show_error=True)