Switch backend to native LTX-2 (ICLoraPipeline)

#2
by linoyts HF Staff - opened
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +220 -138
  3. requirements.txt +9 -7
README.md CHANGED
@@ -11,7 +11,7 @@ pinned: false
11
  hardware: zero-a10g
12
  short_description: Sharpen out-of-focus video with an LTX-2.3 IC-LoRA
13
  models:
14
- - diffusers/LTX-2.3-Distilled-Diffusers
15
  - Lightricks/LTX-2.3-22b-IC-LoRA-Deblur
16
  ---
17
 
@@ -22,5 +22,5 @@ clip and regenerating it in sharp focus, preserving subject, framing, and geomet
22
  Spatial defocus only β€” not motion blur.
23
 
24
  Runs the **deblur IC-LoRA** from [`linoyts/LTX-2.3-loras`](https://huggingface.co/linoyts/LTX-2.3-loras)
25
- on the distilled [`diffusers/LTX-2.3-Distilled-Diffusers`](https://huggingface.co/diffusers/LTX-2.3-Distilled-Diffusers)
26
- checkpoint via `LTX2InContextPipeline`. 8-step distilled schedule, LoRA scale 1.0 (tunable).
 
11
  hardware: zero-a10g
12
  short_description: Sharpen out-of-focus video with an LTX-2.3 IC-LoRA
13
  models:
14
+ - Lightricks/LTX-2.3
15
  - Lightricks/LTX-2.3-22b-IC-LoRA-Deblur
16
  ---
17
 
 
22
  Spatial defocus only β€” not motion blur.
23
 
24
  Runs the **deblur IC-LoRA** from [`linoyts/LTX-2.3-loras`](https://huggingface.co/linoyts/LTX-2.3-loras)
25
+ on the distilled [`Lightricks/LTX-2.3`](https://huggingface.co/Lightricks/LTX-2.3)
26
+ checkpoint via the native LTX-2 pipeline. 8-step distilled schedule, LoRA scale 1.0 (tunable).
app.py CHANGED
@@ -1,68 +1,140 @@
1
  import os
2
-
3
- # ZeroGPU: torch.compile / dynamo are unsupported β€” disable before torch import.
4
- os.environ.setdefault("TORCH_COMPILE_DISABLE", "1")
5
- os.environ.setdefault("TORCHDYNAMO_DISABLE", "1")
6
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import random
8
  import tempfile
9
- import threading
10
- import time
11
 
12
  import numpy as np
13
  import imageio.v3 as iio
14
- import spaces
15
- import torch
16
- import gradio as gr
17
  from PIL import Image, ImageOps
18
- from huggingface_hub import hf_hub_download
19
- from safetensors.torch import load_file
20
 
21
- from diffusers import LTX2InContextPipeline
22
- from diffusers.pipelines.ltx2.pipeline_ltx2_ic_lora import LTX2ReferenceCondition
23
- from diffusers.pipelines.ltx2.utils import DISTILLED_SIGMA_VALUES
24
- from diffusers.utils import load_video, encode_video
25
 
26
- # --- Config -----------------------------------------------------------------
27
- BASE_MODEL = "diffusers/LTX-2.3-Distilled-Diffusers"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  LORA_REPO = "Lightricks/LTX-2.3-22b-IC-LoRA-Deblur"
29
  LORA_FILE = "ltx-2.3-22b-ic-lora-deblur-0.9.safetensors"
30
  LORA_SCALE = 1.0
31
- FPS = 24
32
- NUM_STEPS = len(DISTILLED_SIGMA_VALUES) # 8-step distilled schedule
33
- MAX_SEED = np.iinfo(np.int32).max
34
- HF_TOKEN = os.environ.get("HF_TOKEN")
35
-
36
- RES_PRESETS = {"960Γ—544 (recommended)": (960, 544), "1216Γ—704 (high)": (1216, 704), "768Γ—448 (fast)": (768, 448)}
37
  FRAME_CHOICES = [49, 73, 97, 121]
 
38
 
39
- # --- Load pipeline once at module scope (ZeroGPU registers it) ---------------
40
- pipe = LTX2InContextPipeline.from_pretrained(BASE_MODEL, torch_dtype=torch.bfloat16)
41
- pipe.to("cuda")
42
- pipe.vae.enable_tiling()
43
-
44
- _lora_path = hf_hub_download(LORA_REPO, LORA_FILE, token=HF_TOKEN)
45
- pipe.load_lora_weights(load_file(_lora_path), adapter_name="deblur")
46
- pipe.fuse_lora(lora_scale=LORA_SCALE)
47
- pipe.unload_lora_weights()
48
- # AOTI: load precompiled blocks at ROOT level. LoRA is fused+unloaded so weight names
49
- # match the compiled constants. The scale is per-request, so adjust it by re-fusing the
50
- # *delta* (fuse_lora is additive) + re-snapshot, only when it changes.
51
- spaces.aoti_load(module=pipe.transformer, repo_id="ltx-community/LTX-2.3-Transformer-GroupA-sm120-cu130-r9e")
52
- _FUSED_SCALE = LORA_SCALE
53
- def _refuse(scale):
54
- global _FUSED_SCALE
55
- delta = scale - _FUSED_SCALE
56
- if delta == 0:
57
- return
58
- pipe.load_lora_weights(load_file(_lora_path), adapter_name="deblur")
59
- pipe.fuse_lora(lora_scale=delta)
60
- pipe.unload_lora_weights()
61
- spaces.aoti_load(module=pipe.transformer, repo_id="ltx-community/LTX-2.3-Transformer-GroupA-sm120-cu130-r9e")
62
- _FUSED_SCALE = scale
63
 
64
 
65
- # --- Helpers ----------------------------------------------------------------
66
  def _src_fps(path, default=FPS):
67
  try:
68
  return float(iio.immeta(path, plugin="pyav").get("fps", default)) or default
@@ -70,104 +142,122 @@ def _src_fps(path, default=FPS):
70
  return default
71
 
72
 
73
- def _load_frames(path, num_frames, width, height):
74
- """Natural-speed (real-time at 24fps), aspect-preserving (center-crop) frames."""
75
- frames = load_video(path)
76
- if not frames:
77
- return []
78
- fps = _src_fps(path)
79
  out = []
80
  for i in range(num_frames):
81
- idx = min(int(round(i / FPS * fps)), len(frames) - 1)
82
- out.append(ImageOps.fit(frames[idx].convert("RGB"), (width, height), Image.LANCZOS))
83
- return out
84
-
85
-
86
- def _pick_resolution(first_frame, preset):
 
 
 
 
 
 
87
  w, h = RES_PRESETS[preset]
88
- if first_frame.height > first_frame.width:
89
- w, h = h, w
 
 
 
 
90
  return w, h
91
 
92
 
93
- def _build_prompt(prompt):
94
- desc = prompt.strip() or "the scene"
95
- return (
96
- "Reference shows the same scene, heavily out of focus with soft defocused blur and no fine detail. "
97
- "Edited shows the same scene in sharp focus with crisp detail and clean edges. "
98
- f"DEBLUR {desc}. "
99
- "Subject identity, framing, and background geometry are identical to the reference; "
100
- "only focus and sharpness differ between reference and edited."
101
- )
102
-
103
-
104
- def _export(video_np, audio, path):
105
- kw = {}
106
- if audio is not None:
107
- kw = dict(audio=audio[0].float().cpu(), audio_sample_rate=pipe.vocoder.config.output_sampling_rate)
108
- encode_video(video_np, fps=FPS, output_path=path, **kw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
 
111
  def _duration(*args, **kwargs):
112
- preset = next((a for a in args if isinstance(a, str) and a in RES_PRESETS), None)
113
- num_frames = next((a for a in args if isinstance(a, int) and a in FRAME_CHOICES), 121)
114
- w, h = RES_PRESETS.get(preset, (960, 544))
115
- per_frame = max(1.0, (w * h) / (768 * 448))
116
- return int(25 + int(num_frames) * per_frame * 0.65) # ~2.3x measured runtime
117
-
118
-
119
- # --- Inference --------------------------------------------------------------
120
 
121
 
122
  @spaces.GPU(duration=_duration)
123
- def deblur(video, prompt, preset, num_frames, lora_scale, seed, randomize,
124
- progress=gr.Progress(track_tqdm=True)):
125
  if video is None:
126
- raise gr.Error("Please upload an out-of-focus video to sharpen.")
127
-
128
- if randomize:
129
- seed = random.randint(0, MAX_SEED)
130
- seed = int(seed)
131
  num_frames = int(num_frames)
132
-
133
- probe = load_video(video)
134
- if not probe:
135
- raise gr.Error("Could not read any frames from that video.")
136
- width, height = _pick_resolution(probe[0], preset)
137
-
138
- ref = _load_frames(video, num_frames, width, height)
139
- _refuse(float(lora_scale))
140
- prompt = _build_prompt(prompt)
141
-
142
- def _cb(p, i, t, kw):
143
- progress((i + 1) / NUM_STEPS, desc=f"Deblurring β€” step {i + 1}/{NUM_STEPS}")
144
- return {}
145
-
146
- video_out, audio_out = pipe(
147
- prompt=prompt,
148
- negative_prompt="",
149
- reference_conditions=[LTX2ReferenceCondition(frames=ref, strength=1.0)],
150
- reference_downscale_factor=1,
151
- width=width, height=height, num_frames=num_frames, frame_rate=FPS,
152
- num_inference_steps=NUM_STEPS, sigmas=DISTILLED_SIGMA_VALUES,
153
- guidance_scale=1.0, stg_scale=0.0, audio_guidance_scale=1.0, audio_stg_scale=0.0,
154
- generator=torch.Generator(device="cuda").manual_seed(seed),
155
- output_type="np", return_dict=False,
156
- )
157
-
158
- out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
159
- _export(video_out[0], audio_out, out_path)
160
  return out_path, seed
161
 
162
 
163
- # --- UI ---------------------------------------------------------------------
 
 
 
 
164
  with gr.Blocks(title="LTX-2.3 Deblur") as demo:
165
  gr.Markdown(
166
  "# πŸ”Ž LTX-2.3 Video Deblur\n"
167
  "Restore sharpness to out-of-focus / defocused footage while keeping subject, framing and geometry "
168
  "identity (spatial defocus, not motion blur). Using "
169
- "[LTX 2.3 Distilled](https://huggingface.co/diffusers/LTX-2.3-Distilled-Diffusers) with the "
170
- "[Deblur IC-LoRA](https://huggingface.co/Lightricks/LTX-2.3-22b-IC-LoRA-Deblur), via diffusers 🧨."
171
  )
172
  with gr.Row():
173
  with gr.Column():
@@ -179,30 +269,22 @@ with gr.Blocks(title="LTX-2.3 Deblur") as demo:
179
  with gr.Accordion("Settings", open=False):
180
  preset = gr.Dropdown(list(RES_PRESETS), value="960Γ—544 (recommended)", label="Resolution")
181
  num_frames = gr.Dropdown(FRAME_CHOICES, value=121, label="Frames (24fps)")
182
- lora_scale = gr.Slider(0.5, 1.0, value=1.0, step=0.05,
183
- label="LoRA strength (lower if over-sharpened / haloing)")
184
  randomize = gr.Checkbox(True, label="Randomize seed")
185
  seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
186
  run = gr.Button("Deblur", variant="primary")
187
  with gr.Column():
188
  video_out = gr.Video(label="Sharpened result")
189
 
190
- run.click(deblur, inputs=[video_in, prompt, preset, num_frames, lora_scale, seed, randomize],
191
  outputs=[video_out, seed])
192
 
193
  gr.Examples(
194
  examples=[
195
- ["examples/man_laughing_blur.mp4",
196
- "a pin-sharp close-up portrait of a middle-aged man laughing warmly, deep smile lines crinkling around his eyes, individual strands of hair and fine stubble crisply resolved, soft natural window light modeling the texture of his skin with bright catchlights in his eyes; hearty, genuine laughter rising and falling, with a quiet intimate room ambience",
197
- "960Γ—544 (recommended)", 121, 1.0, 42, False],
198
- ["examples/slicing_veggie_blur.mp4",
199
- "a razor-sharp close-up of hands slicing a fresh green zucchini into thin even rounds on a pale wooden cutting board β€” the glossy green skin, the pale seeded interior, beads of water on the blade and the fine grain of the wood all crisply resolved, a stainless-steel knife edge glinting under warm kitchen light; crisp rhythmic chopping against the board and a gentle kitchen ambience",
200
- "960Γ—544 (recommended)", 121, 1.0, 42, False],
201
- ["examples/landscape_blur.mp4",
202
- "a pin-sharp misty green mountain landscape mirrored in calm still water β€” individual pines on the slopes, drifting layers of fog and crisp rippling reflections all resolving into clean detail under soft cool morning light; a gentle wind moving over the water, distant birdsong and the faint lap of ripples",
203
- "960Γ—544 (recommended)", 121, 1.0, 42, False],
204
  ],
205
- inputs=[video_in, prompt, preset, num_frames, lora_scale, seed, randomize],
206
  outputs=[video_out, seed], fn=deblur, cache_examples=True, cache_mode="lazy",
207
  )
208
 
 
1
  import os
2
+ import subprocess
3
+ import sys
4
+
5
+ # ZeroGPU: torch.compile / dynamo unsupported β€” disable before any torch import.
6
+ os.environ["TORCH_COMPILE_DISABLE"] = "1"
7
+ os.environ["TORCHDYNAMO_DISABLE"] = "1"
8
+
9
+ # memory-efficient attention
10
+ subprocess.run([sys.executable, "-m", "pip", "install", "xformers==0.0.32.post2", "--no-build-isolation"], check=False)
11
+
12
+ # --- clone + install the NATIVE LTX-2 codebase at the pinned commit the working ZeroGPU spaces use ---
13
+ LTX_REPO_URL = "https://github.com/Lightricks/LTX-2.git"
14
+ LTX_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "LTX-2")
15
+ LTX_COMMIT = "ae855f8538843825f9015a419cf4ba5edaf5eec2"
16
+ if not os.path.exists(LTX_REPO_DIR):
17
+ subprocess.run(["git", "clone", LTX_REPO_URL, LTX_REPO_DIR], check=True)
18
+ subprocess.run(["git", "-C", LTX_REPO_DIR, "checkout", LTX_COMMIT], check=True)
19
+ subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps",
20
+ "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-core"),
21
+ "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines")], check=True)
22
+ sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
23
+ sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
24
+
25
+ import logging
26
  import random
27
  import tempfile
 
 
28
 
29
  import numpy as np
30
  import imageio.v3 as iio
 
 
 
31
  from PIL import Image, ImageOps
 
 
32
 
33
+ import torch
34
+ torch._dynamo.config.suppress_errors = True
35
+ torch._dynamo.config.disable = True
 
36
 
37
+ import spaces
38
+ import gradio as gr
39
+ from huggingface_hub import hf_hub_download, snapshot_download
40
+
41
+ # Import LTX modules in the proven order β€” importing ltx_core.quantization/loader FIRST hits a
42
+ # circular import (fp8_cast <-> loader.fuse_loras). Importing the model modules first forces the
43
+ # correct init order (mirrors the working reference Space).
44
+ from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as _vae_decode_video # noqa: F401
45
+ from ltx_core.model.upsampler import upsample_video as _upsample_video # noqa: F401
46
+ from ltx_core.model.audio_vae import encode_audio as _vae_encode_audio # noqa: F401
47
+ from ltx_core.quantization import QuantizationPolicy
48
+ from ltx_core.loader import LoraPathStrengthAndSDOps, LTXV_LORA_COMFY_RENAMING_MAP
49
+ from ltx_pipelines.ic_lora import ICLoraPipeline
50
+ from ltx_pipelines.utils.media_io import encode_video
51
+
52
+ # --- ZeroGPU loader patch -------------------------------------------------------------
53
+ # The native loader opens safetensors directly on the CUDA device
54
+ # (safe_open(path, device="cuda")), doing the host->device copy in safetensors' own C++
55
+ # (cudaMemcpy) β€” bypassing torch.Tensor.to, the call ZeroGPU patches to virtualise + pack
56
+ # weights at module scope. Result: "No CUDA GPUs are available" at startup, nothing packs.
57
+ # Patch it to open on CPU then move via torch.Tensor.to (ZeroGPU-virtualisable).
58
+ import safetensors as _safetensors
59
+ import ltx_core.loader.sft_loader as _sft
60
+ from ltx_core.loader.primitives import StateDict as _StateDict
61
+
62
+ def _zerogpu_safe_load(self, path, sd_ops, device=None):
63
+ device = device or torch.device("cpu")
64
+ sd, size, dtype = {}, 0, set()
65
+ model_paths = path if isinstance(path, list) else [path]
66
+ for shard_path in model_paths:
67
+ with _safetensors.safe_open(shard_path, framework="pt", device="cpu") as f:
68
+ for name in f.keys():
69
+ expected = name if sd_ops is None else sd_ops.apply_to_key(name)
70
+ if expected is None:
71
+ continue
72
+ value = f.get_tensor(name).to(device=device) # torch path -> ZeroGPU-virtualised
73
+ kvs = ((expected, value),)
74
+ if sd_ops is not None:
75
+ kvs = sd_ops.apply_to_key_value(expected, value)
76
+ for k, v in kvs:
77
+ size += v.nbytes
78
+ dtype.add(v.dtype)
79
+ sd[k] = v
80
+ return _StateDict(sd=sd, device=device, size=size, dtype=dtype)
81
+
82
+ _sft.SafetensorsStateDictLoader.load = _zerogpu_safe_load
83
+ print("[PATCH] safetensors loader -> CPU-open + torch.to (ZeroGPU-virtualisable)")
84
+ # --------------------------------------------------------------------------------------
85
+
86
+ # --- attention backend patch (FA3 crashes on Blackwell ZeroGPU; use xformers/SDPA) ---
87
+ import torch.nn.functional as F
88
+ from ltx_core.model.transformer import attention as _attn_mod
89
+
90
+ def _sdpa_as_mea(query, key, value, attn_bias=None, scale=None, **kwargs):
91
+ q, k, v = query.transpose(1, 2), key.transpose(1, 2), value.transpose(1, 2)
92
+ return F.scaled_dot_product_attention(q, k, v, scale=scale).transpose(1, 2)
93
+
94
+ # IMPORTANT (ZeroGPU): never query CUDA at module scope. SDPA works on every GPU (incl.
95
+ # Blackwell ZeroGPU, where FA3 crashes), so patch it unconditionally.
96
+ _attn_mod.memory_efficient_attention = _sdpa_as_mea
97
+ print("[ATTN] SDPA (patched at module scope, no CUDA query)")
98
+
99
+ logging.getLogger().setLevel(logging.INFO)
100
+
101
+ # =========================== PER-LORA CONFIG (colorize) ===========================
102
+ TITLE = "LTX-2.3 Deblur (native LTX-2)"
103
  LORA_REPO = "Lightricks/LTX-2.3-22b-IC-LoRA-Deblur"
104
  LORA_FILE = "ltx-2.3-22b-ic-lora-deblur-0.9.safetensors"
105
  LORA_SCALE = 1.0
106
+ SKIP_STAGE_2 = True
107
+ GRAYSCALE_REF = False
108
+ RES_PRESETS = {"960Γ—544 (recommended)": (960, 544), "768Γ—448 (fast)": (768, 448)}
109
+ DEFAULT_PRESET = "960Γ—544 (recommended)"
 
 
110
  FRAME_CHOICES = [49, 73, 97, 121]
111
+ DEFAULT_FRAMES = 121
112
 
113
+ def build_prompt(p):
114
+ return (
115
+ "Reference shows the same scene, heavily out of focus with soft defocused blur and no fine detail. "
116
+ "Edited shows the same scene in sharp focus with crisp detail and clean edges. "
117
+ f"DEBLUR {p.strip()}. "
118
+ "Subject identity, framing and background geometry are identical to the reference; only focus and sharpness differ."
119
+ )
120
+
121
+ EXAMPLES = [
122
+ ["examples/landscape_blur.mp4",
123
+ "a pin-sharp misty green mountain landscape mirrored in calm still water, crisp pines and rippling reflections; gentle wind over water and distant birdsong",
124
+ "960Γ—544 (recommended)", 121, 42, False],
125
+ ["examples/man_laughing_blur.mp4",
126
+ "a crisp close-up of a man laughing warmly, sharp detail in his skin, hair and eyes; hearty laughter and a quiet room ambience",
127
+ "960Γ—544 (recommended)", 121, 42, False],
128
+ ]
129
+ # =================================================================================
130
+
131
+ FPS = 24.0
132
+ MAX_SEED = np.iinfo(np.int32).max
133
+ HF_TOKEN = os.environ.get("HF_TOKEN")
134
+ LTX_MODEL_REPO = "Lightricks/LTX-2.3"
135
+ GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized"
 
136
 
137
 
 
138
  def _src_fps(path, default=FPS):
139
  try:
140
  return float(iio.immeta(path, plugin="pyav").get("fps", default)) or default
 
142
  return default
143
 
144
 
145
+ def _prep_reference(path, width, height, num_frames):
146
+ """Resample to 24fps, aspect-fit/crop to WxH, NF frames; (optionally grayscale); write temp mp4."""
147
+ vid = iio.imread(path, plugin="pyav")
148
+ src_fps = _src_fps(path)
149
+ n = len(vid)
 
150
  out = []
151
  for i in range(num_frames):
152
+ idx = min(int(round(i / FPS * src_fps)), n - 1)
153
+ im = Image.fromarray(vid[idx]).convert("RGB")
154
+ im = ImageOps.fit(im, (width, height), Image.LANCZOS)
155
+ if GRAYSCALE_REF:
156
+ im = im.convert("L").convert("RGB")
157
+ out.append(np.array(im))
158
+ tmp = tempfile.mktemp(suffix=".mp4")
159
+ iio.imwrite(tmp, np.stack(out), fps=FPS, plugin="pyav", codec="libx264")
160
+ return tmp
161
+
162
+
163
+ def _pick_resolution(path, preset):
164
  w, h = RES_PRESETS[preset]
165
+ try:
166
+ f0 = iio.imread(path, plugin="pyav", index=0)
167
+ if f0.shape[0] > f0.shape[1]: # portrait
168
+ w, h = h, w
169
+ except Exception:
170
+ pass
171
  return w, h
172
 
173
 
174
+ # --- Load native pipeline + IC-LoRA once at module scope (ZeroGPU packs weights here) ---
175
+ print("Downloading checkpoints…")
176
+ checkpoint_path = hf_hub_download(LTX_MODEL_REPO, "ltx-2.3-22b-distilled-1.1.safetensors", token=HF_TOKEN)
177
+ spatial_upsampler_path = hf_hub_download(LTX_MODEL_REPO, "ltx-2.3-spatial-upscaler-x2-1.1.safetensors", token=HF_TOKEN)
178
+ gemma_root = snapshot_download(GEMMA_REPO, token=HF_TOKEN)
179
+ lora_path = hf_hub_download(LORA_REPO, LORA_FILE, token=HF_TOKEN)
180
+
181
+ print("Building ICLoraPipeline…")
182
+ pipeline = ICLoraPipeline(
183
+ distilled_checkpoint_path=checkpoint_path,
184
+ spatial_upsampler_path=spatial_upsampler_path,
185
+ gemma_root=gemma_root,
186
+ loras=[LoraPathStrengthAndSDOps(lora_path, LORA_SCALE, LTXV_LORA_COMFY_RENAMING_MAP)],
187
+ # bf16 (NOT fp8): the IC-LoRA is fused into the transformer at MODULE SCOPE (the GPU
188
+ # worker can't re-open the checkpoint file). fp8_cast()'s fusion runs a custom CUDA kernel
189
+ # that can't be ZeroGPU-virtualised; the bf16 fuse rule is pure torch -> virtualisable.
190
+ quantization=None,
191
+ )
192
+
193
+
194
+ def _preload_pin(ledger, tag):
195
+ if ledger is None:
196
+ return
197
+ for name in ["transformer", "video_encoder", "video_decoder", "audio_encoder",
198
+ "audio_decoder", "vocoder", "spatial_upsampler", "text_encoder",
199
+ "gemma_embeddings_processor"]:
200
+ fn = getattr(ledger, name, None)
201
+ if callable(fn):
202
+ try:
203
+ obj = fn()
204
+ setattr(ledger, name, (lambda o=obj: o))
205
+ print(f"[preload {tag}] {name} βœ“")
206
+ except Exception as e:
207
+ print(f"[preload {tag}] {name} skipped: {e}")
208
+
209
+ # Preload stage 1 always; preload stage 2 only when two-stage is used (skip_stage_2=False).
210
+ # Eagerly pinning both ledgers materializes TWO ~46GB transformers β€” too big for the ZeroGPU pack.
211
+ _preload_pin(getattr(pipeline, "stage_1_model_ledger", None), "stage1")
212
+ if not SKIP_STAGE_2:
213
+ _preload_pin(getattr(pipeline, "stage_2_model_ledger", None), "stage2")
214
+ print("Pipeline ready.")
215
 
216
 
217
  def _duration(*args, **kwargs):
218
+ nf = next((a for a in args if isinstance(a, int) and a in FRAME_CHOICES), DEFAULT_FRAMES)
219
+ return int(60 + nf * 1.2)
 
 
 
 
 
 
220
 
221
 
222
  @spaces.GPU(duration=_duration)
223
+ @torch.inference_mode()
224
+ def deblur(video, prompt, preset, num_frames, seed, randomize, progress=gr.Progress(track_tqdm=True)):
225
  if video is None:
226
+ raise gr.Error("Please upload a video.")
227
+ if not prompt.strip():
228
+ raise gr.Error("Describe the result (e.g. 'a brown rabbit on grey rocks, soft birdsong').")
229
+ seed = random.randint(0, MAX_SEED) if randomize else int(seed)
 
230
  num_frames = int(num_frames)
231
+ width, height = _pick_resolution(video, preset)
232
+ ref_path = _prep_reference(video, width, height, num_frames)
233
+ tiling = TilingConfig.default()
234
+ # skip_stage_2 outputs at half the passed dims -> pass 2x so output matches the preset.
235
+ gen_w, gen_h = (width * 2, height * 2) if SKIP_STAGE_2 else (width, height)
236
+ video_out, audio_out = pipeline(
237
+ prompt=build_prompt(prompt),
238
+ seed=seed, height=gen_h, width=gen_w,
239
+ num_frames=num_frames, frame_rate=FPS,
240
+ images=[], video_conditioning=[(ref_path, 1.0)],
241
+ skip_stage_2=SKIP_STAGE_2, tiling_config=tiling,
242
+ )
243
+ out_path = tempfile.mktemp(suffix=".mp4")
244
+ encode_video(video=video_out, fps=FPS, audio=audio_out, output_path=out_path,
245
+ video_chunks_number=get_video_chunks_number(num_frames, tiling))
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  return out_path, seed
247
 
248
 
249
+ # --- UI config (match the public Space exactly) ---
250
+ RES_PRESETS = {"960Γ—544 (recommended)": (960, 544), "1216Γ—704 (high)": (1216, 704), "768Γ—448 (fast)": (768, 448)}
251
+ FRAME_CHOICES = [49, 73, 97, 121]
252
+
253
+
254
  with gr.Blocks(title="LTX-2.3 Deblur") as demo:
255
  gr.Markdown(
256
  "# πŸ”Ž LTX-2.3 Video Deblur\n"
257
  "Restore sharpness to out-of-focus / defocused footage while keeping subject, framing and geometry "
258
  "identity (spatial defocus, not motion blur). Using "
259
+ "[LTX 2.3 Distilled](https://huggingface.co/Lightricks/LTX-2.3) with the "
260
+ "[Deblur IC-LoRA](https://huggingface.co/Lightricks/LTX-2.3-22b-IC-LoRA-Deblur)."
261
  )
262
  with gr.Row():
263
  with gr.Column():
 
269
  with gr.Accordion("Settings", open=False):
270
  preset = gr.Dropdown(list(RES_PRESETS), value="960Γ—544 (recommended)", label="Resolution")
271
  num_frames = gr.Dropdown(FRAME_CHOICES, value=121, label="Frames (24fps)")
 
 
272
  randomize = gr.Checkbox(True, label="Randomize seed")
273
  seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
274
  run = gr.Button("Deblur", variant="primary")
275
  with gr.Column():
276
  video_out = gr.Video(label="Sharpened result")
277
 
278
+ run.click(deblur, inputs=[video_in, prompt, preset, num_frames, seed, randomize],
279
  outputs=[video_out, seed])
280
 
281
  gr.Examples(
282
  examples=[
283
+ ['examples/man_laughing_blur.mp4', 'a pin-sharp close-up portrait of a middle-aged man laughing warmly, deep smile lines crinkling around his eyes, individual strands of hair and fine stubble crisply resolved, soft natural window light modeling the texture of his skin with bright catchlights in his eyes; hearty, genuine laughter rising and falling, with a quiet intimate room ambience', '960Γ—544 (recommended)', 121, 42, False],
284
+ ['examples/slicing_veggie_blur.mp4', 'a razor-sharp close-up of hands slicing a fresh green zucchini into thin even rounds on a pale wooden cutting board β€” the glossy green skin, the pale seeded interior, beads of water on the blade and the fine grain of the wood all crisply resolved, a stainless-steel knife edge glinting under warm kitchen light; crisp rhythmic chopping against the board and a gentle kitchen ambience', '960Γ—544 (recommended)', 121, 42, False],
285
+ ['examples/landscape_blur.mp4', 'a pin-sharp misty green mountain landscape mirrored in calm still water β€” individual pines on the slopes, drifting layers of fog and crisp rippling reflections all resolving into clean detail under soft cool morning light; a gentle wind moving over the water, distant birdsong and the faint lap of ripples', '960Γ—544 (recommended)', 121, 42, False],
 
 
 
 
 
 
286
  ],
287
+ inputs=[video_in, prompt, preset, num_frames, seed, randomize],
288
  outputs=[video_out, seed], fn=deblur, cache_examples=True, cache_mode="lazy",
289
  )
290
 
requirements.txt CHANGED
@@ -1,9 +1,11 @@
1
- git+https://github.com/huggingface/diffusers
2
- transformers
3
  accelerate
4
- peft
5
- safetensors
6
- sentencepiece
7
- imageio
8
- imageio-ffmpeg
9
  av
 
 
 
 
 
1
+ transformers==4.57.6
 
2
  accelerate
3
+ torch==2.8.0
4
+ torchaudio==2.8.0
5
+ einops
6
+ scipy
 
7
  av
8
+ scikit-image>=0.25.2
9
+ flashpack==0.1.2
10
+ imageio[ffmpeg]
11
+ pillow