Switch backend to native LTX-2 (ICLoraPipeline)

#2
by linoyts HF Staff - opened
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +223 -111
  3. requirements.txt +9 -7
README.md CHANGED
@@ -11,7 +11,7 @@ pinned: false
11
  hardware: zero-a10g
12
  short_description: Colorize B&W video with an LTX-2.3 IC-LoRA
13
  models:
14
- - diffusers/LTX-2.3-Distilled-Diffusers
15
  - Lightricks/LTX-2.3-22b-IC-LoRA-Colorization
16
  ---
17
 
@@ -21,6 +21,6 @@ Restores natural color to grayscale, monochrome, or desaturated video while keep
21
  subject identity, framing, and motion untouched — only color changes.
22
 
23
  Runs the **colorization IC-LoRA** from [`linoyts/LTX-2.3-loras`](https://huggingface.co/linoyts/LTX-2.3-loras)
24
- on the distilled [`diffusers/LTX-2.3-Distilled-Diffusers`](https://huggingface.co/diffusers/LTX-2.3-Distilled-Diffusers)
25
- checkpoint via `LTX2InContextPipeline`. Upload any clip — it's desaturated to a grayscale
26
  reference and recolored from your prompt. 8-step distilled schedule, LoRA scale 1.0.
 
11
  hardware: zero-a10g
12
  short_description: Colorize B&W video with an LTX-2.3 IC-LoRA
13
  models:
14
+ - Lightricks/LTX-2.3
15
  - Lightricks/LTX-2.3-22b-IC-LoRA-Colorization
16
  ---
17
 
 
21
  subject identity, framing, and motion untouched — only color changes.
22
 
23
  Runs the **colorization IC-LoRA** from [`linoyts/LTX-2.3-loras`](https://huggingface.co/linoyts/LTX-2.3-loras)
24
+ on the distilled [`Lightricks/LTX-2.3`](https://huggingface.co/Lightricks/LTX-2.3)
25
+ checkpoint via the native LTX-2 pipeline. Upload any clip — it's desaturated to a grayscale
26
  reference and recolored from your prompt. 8-step distilled schedule, LoRA scale 1.0.
app.py CHANGED
@@ -1,56 +1,147 @@
1
  import os
2
-
3
- # ZeroGPU: torch.compile / dynamo are unsupported — disable before torch import.
4
- os.environ.setdefault("TORCH_COMPILE_DISABLE", "1")
5
- os.environ.setdefault("TORCHDYNAMO_DISABLE", "1")
6
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import random
8
  import tempfile
9
- import threading
10
- import time
11
 
12
  import numpy as np
13
  import imageio.v3 as iio
14
- import spaces
15
- import torch
16
- import gradio as gr
17
  from PIL import Image, ImageOps
18
- from huggingface_hub import hf_hub_download
19
- from safetensors.torch import load_file
20
 
21
- from diffusers import LTX2InContextPipeline
22
- from diffusers.pipelines.ltx2.pipeline_ltx2_ic_lora import LTX2ReferenceCondition
23
- from diffusers.pipelines.ltx2.utils import DISTILLED_SIGMA_VALUES
24
- from diffusers.utils import load_video, encode_video
25
 
26
- # --- Config -----------------------------------------------------------------
27
- BASE_MODEL = "diffusers/LTX-2.3-Distilled-Diffusers"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  LORA_REPO = "Lightricks/LTX-2.3-22b-IC-LoRA-Colorization"
29
  LORA_FILE = "ltx-2.3-22b-ic-lora-colorization-0.9.safetensors"
30
  LORA_SCALE = 1.0
31
- FPS = 24
32
- NUM_STEPS = len(DISTILLED_SIGMA_VALUES) # 8-step distilled schedule
33
- MAX_SEED = np.iinfo(np.int32).max
34
- HF_TOKEN = os.environ.get("HF_TOKEN")
35
-
36
  RES_PRESETS = {"960×544 (recommended)": (960, 544), "768×448 (fast)": (768, 448)}
 
37
  FRAME_CHOICES = [49, 73, 97, 121]
 
38
 
39
- # --- Load pipeline once at module scope (ZeroGPU registers it) ---------------
40
- pipe = LTX2InContextPipeline.from_pretrained(BASE_MODEL, torch_dtype=torch.bfloat16)
41
- pipe.to("cuda")
42
- pipe.vae.enable_tiling()
 
 
 
 
43
 
44
- _lora_path = hf_hub_download(LORA_REPO, LORA_FILE, token=HF_TOKEN)
45
- pipe.load_lora_weights(load_file(_lora_path), adapter_name="colorize")
46
- pipe.fuse_lora(lora_scale=LORA_SCALE)
47
- pipe.unload_lora_weights()
48
- # AOTI: load precompiled transformer blocks at ROOT level (ZeroGPU loads on cuda at
49
- # module scope; do NOT lazy-load or move to cuda inside @spaces.GPU).
50
- spaces.aoti_load(module=pipe.transformer, repo_id="ltx-community/LTX-2.3-Transformer-GroupA-sm120-cu130-r9e")
 
 
 
 
 
 
 
 
51
 
52
 
53
- # --- Helpers ----------------------------------------------------------------
54
  def _src_fps(path, default=FPS):
55
  try:
56
  return float(iio.immeta(path, plugin="pyav").get("fps", default)) or default
@@ -58,105 +149,126 @@ def _src_fps(path, default=FPS):
58
  return default
59
 
60
 
61
- def _load_frames(path, num_frames, width, height):
62
- """Natural-speed (real-time at 24fps), aspect-preserving (center-crop) frames."""
63
- frames = load_video(path)
64
- if not frames:
65
- return []
66
- fps = _src_fps(path)
67
  out = []
68
  for i in range(num_frames):
69
- idx = int(round(i / FPS * fps))
70
- idx = min(idx, len(frames) - 1)
71
- out.append(ImageOps.fit(frames[idx].convert("RGB"), (width, height), Image.LANCZOS))
72
- return out
73
-
74
-
75
- def _to_grayscale(img):
76
- return img.convert("L").convert("RGB")
77
-
78
-
79
- def _pick_resolution(first_frame, preset):
 
80
  w, h = RES_PRESETS[preset]
81
- if first_frame.height > first_frame.width:
82
- w, h = h, w
 
 
 
 
83
  return w, h
84
 
85
 
86
- def _build_prompt(prompt):
87
- # COLORIZE convention; the user's single elaborate prompt (colors + any sounds) goes after the keyword.
88
- return (
89
- "Reference shows the same scene in high-contrast monochrome with soft natural daylight. "
90
- "Edited shows the same scene with natural colors restored. "
91
- f"COLORIZE {prompt.strip()}. "
92
- "Subject identity, framing, and background geometry are identical to the reference; "
93
- "only color information differs between reference and edited."
94
- )
95
-
96
-
97
- def _export(video_np, audio, path):
98
- kw = {}
99
- if audio is not None:
100
- kw = dict(audio=audio[0].float().cpu(), audio_sample_rate=pipe.vocoder.config.output_sampling_rate)
101
- encode_video(video_np, fps=FPS, output_path=path, **kw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
 
104
  def _duration(*args, **kwargs):
105
- preset = next((a for a in args if isinstance(a, str) and a in RES_PRESETS), None)
106
- num_frames = next((a for a in args if isinstance(a, int) and a in FRAME_CHOICES), 121)
107
- w, h = RES_PRESETS.get(preset, (960, 544))
108
- per_frame = max(1.0, (w * h) / (768 * 448))
109
- return int(25 + int(num_frames) * per_frame * 0.65) # ~2.3x measured runtime
110
-
111
 
112
- # --- Inference --------------------------------------------------------------
113
 
114
  @spaces.GPU(duration=_duration)
115
- def colorize(video, prompt, preset, num_frames, seed, randomize,
116
- progress=gr.Progress(track_tqdm=True)):
117
  if video is None:
118
- raise gr.Error("Please upload a video to colorize.")
119
  if not prompt.strip():
120
- raise gr.Error("Describe the colorized scene (e.g. 'a brown rabbit on grey granite rocks, soft birdsong').")
121
-
122
- if randomize:
123
- seed = random.randint(0, MAX_SEED)
124
- seed = int(seed)
125
  num_frames = int(num_frames)
126
-
127
- probe = load_video(video)
128
- if not probe:
129
- raise gr.Error("Could not read any frames from that video.")
130
- width, height = _pick_resolution(probe[0], preset)
131
-
132
- ref = [_to_grayscale(f) for f in _load_frames(video, num_frames, width, height)]
133
- prompt = _build_prompt(prompt)
134
-
135
-
136
- video_out, audio_out = pipe(
137
- prompt=prompt,
138
- negative_prompt="",
139
- reference_conditions=[LTX2ReferenceCondition(frames=ref, strength=1.0)],
140
- reference_downscale_factor=1,
141
- width=width, height=height, num_frames=num_frames, frame_rate=FPS,
142
- num_inference_steps=NUM_STEPS, sigmas=DISTILLED_SIGMA_VALUES,
143
- guidance_scale=1.0, stg_scale=0.0, audio_guidance_scale=1.0, audio_stg_scale=0.0,
144
- generator=torch.Generator(device="cuda").manual_seed(seed),
145
- output_type="np", return_dict=False,
146
  )
147
-
148
- out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
149
- _export(video_out[0], audio_out, out_path)
150
  return out_path, seed
151
 
152
 
153
- # --- UI ---------------------------------------------------------------------
 
 
 
 
154
  with gr.Blocks(title="LTX-2.3 Colorize") as demo:
155
  gr.Markdown(
156
  "# 🎨 LTX-2.3 Video Colorization\n"
157
  "Restore natural color to black-and-white or desaturated footage while keeping subject, framing and "
158
- "motion identity. Using [LTX 2.3 Distilled](https://huggingface.co/diffusers/LTX-2.3-Distilled-Diffusers) "
159
- "with the [Colorization IC-LoRA](https://huggingface.co/Lightricks/LTX-2.3-22b-IC-LoRA-Colorization), via diffusers 🧨."
160
  )
161
  with gr.Row():
162
  with gr.Column():
 
1
  import os
2
+ import subprocess
3
+ import sys
4
+
5
+ # ZeroGPU: torch.compile / dynamo unsupported — disable before any torch import.
6
+ os.environ["TORCH_COMPILE_DISABLE"] = "1"
7
+ os.environ["TORCHDYNAMO_DISABLE"] = "1"
8
+
9
+ # memory-efficient attention
10
+ subprocess.run([sys.executable, "-m", "pip", "install", "xformers==0.0.32.post2", "--no-build-isolation"], check=False)
11
+
12
+ # --- clone + install the NATIVE LTX-2 codebase at the pinned commit the working ZeroGPU spaces use ---
13
+ LTX_REPO_URL = "https://github.com/Lightricks/LTX-2.git"
14
+ LTX_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "LTX-2")
15
+ LTX_COMMIT = "ae855f8538843825f9015a419cf4ba5edaf5eec2"
16
+ if not os.path.exists(LTX_REPO_DIR):
17
+ subprocess.run(["git", "clone", LTX_REPO_URL, LTX_REPO_DIR], check=True)
18
+ subprocess.run(["git", "-C", LTX_REPO_DIR, "checkout", LTX_COMMIT], check=True)
19
+ subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps",
20
+ "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-core"),
21
+ "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines")], check=True)
22
+ sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
23
+ sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
24
+
25
+ import logging
26
  import random
27
  import tempfile
 
 
28
 
29
  import numpy as np
30
  import imageio.v3 as iio
 
 
 
31
  from PIL import Image, ImageOps
 
 
32
 
33
+ import torch
34
+ torch._dynamo.config.suppress_errors = True
35
+ torch._dynamo.config.disable = True
 
36
 
37
+ import spaces
38
+ import gradio as gr
39
+ from huggingface_hub import hf_hub_download, snapshot_download
40
+
41
+
42
+ # Import LTX modules in the proven order — importing ltx_core.quantization/loader FIRST hits a
43
+ # circular import (fp8_cast <-> loader.fuse_loras). Importing the model modules first forces the
44
+ # correct init order (mirrors the working reference Space).
45
+ from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as _vae_decode_video # noqa: F401
46
+ from ltx_core.model.upsampler import upsample_video as _upsample_video # noqa: F401
47
+ from ltx_core.model.audio_vae import encode_audio as _vae_encode_audio # noqa: F401
48
+ from ltx_core.quantization import QuantizationPolicy
49
+ from ltx_core.loader import LoraPathStrengthAndSDOps, LTXV_LORA_COMFY_RENAMING_MAP
50
+ from ltx_pipelines.ic_lora import ICLoraPipeline
51
+ from ltx_pipelines.utils.media_io import encode_video
52
+
53
+ # --- ZeroGPU loader patch -------------------------------------------------------------
54
+ # The native loader opens safetensors directly on the CUDA device:
55
+ # safetensors.safe_open(path, framework="pt", device="cuda")
56
+ # which performs the host->device copy inside safetensors' own C++ (cudaMemcpy), BYPASSING
57
+ # torch.Tensor.to — the exact call ZeroGPU patches to virtualise + pack weights at module
58
+ # scope. The result: "No CUDA GPUs are available" at startup and nothing gets packed.
59
+ # Patch the loader to open on CPU and move via torch.Tensor.to (which ZeroGPU virtualises),
60
+ # so the module-scope preload packs correctly — matching diffusers / the reference Space.
61
+ import safetensors as _safetensors
62
+ import ltx_core.loader.sft_loader as _sft
63
+ from ltx_core.loader.primitives import StateDict as _StateDict
64
+
65
+ def _zerogpu_safe_load(self, path, sd_ops, device=None):
66
+ device = device or torch.device("cpu")
67
+ sd, size, dtype = {}, 0, set()
68
+ model_paths = path if isinstance(path, list) else [path]
69
+ for shard_path in model_paths:
70
+ with _safetensors.safe_open(shard_path, framework="pt", device="cpu") as f:
71
+ for name in f.keys():
72
+ expected = name if sd_ops is None else sd_ops.apply_to_key(name)
73
+ if expected is None:
74
+ continue
75
+ value = f.get_tensor(name).to(device=device) # torch path → ZeroGPU-virtualised
76
+ kvs = ((expected, value),)
77
+ if sd_ops is not None:
78
+ kvs = sd_ops.apply_to_key_value(expected, value)
79
+ for k, v in kvs:
80
+ size += v.nbytes
81
+ dtype.add(v.dtype)
82
+ sd[k] = v
83
+ return _StateDict(sd=sd, device=device, size=size, dtype=dtype)
84
+
85
+ _sft.SafetensorsStateDictLoader.load = _zerogpu_safe_load
86
+ print("[PATCH] safetensors loader → CPU-open + torch.to (ZeroGPU-virtualisable)")
87
+ # --------------------------------------------------------------------------------------
88
+
89
+ # --- attention backend patch (FA3 crashes on Blackwell ZeroGPU; use xformers/SDPA) ---
90
+ import torch.nn.functional as F
91
+ from ltx_core.model.transformer import attention as _attn_mod
92
+
93
+ def _sdpa_as_mea(query, key, value, attn_bias=None, scale=None, **kwargs):
94
+ q, k, v = query.transpose(1, 2), key.transpose(1, 2), value.transpose(1, 2)
95
+ return F.scaled_dot_product_attention(q, k, v, scale=scale).transpose(1, 2)
96
+
97
+ # IMPORTANT (ZeroGPU): do NOT query CUDA at module scope. torch.cuda.get_device_capability()
98
+ # forces torch._C._cuda_init() in the GPU-less main process, which poisons ZeroGPU's CUDA
99
+ # virtualization — the module-scope model preload then fails with "No CUDA GPUs are available"
100
+ # and ZeroGPU can't pack the weights. SDPA works on every GPU (incl. Blackwell ZeroGPU, where
101
+ # FA3 crashes), so patch it unconditionally without ever touching torch.cuda here.
102
+ _attn_mod.memory_efficient_attention = _sdpa_as_mea
103
+ print("[ATTN] SDPA (patched at module scope, no CUDA query)")
104
+
105
+ logging.getLogger().setLevel(logging.INFO)
106
+
107
+ # =========================== PER-LORA CONFIG (colorize) ===========================
108
+ TITLE = "LTX-2.3 Colorize (native LTX-2)"
109
  LORA_REPO = "Lightricks/LTX-2.3-22b-IC-LoRA-Colorization"
110
  LORA_FILE = "ltx-2.3-22b-ic-lora-colorization-0.9.safetensors"
111
  LORA_SCALE = 1.0
112
+ SKIP_STAGE_2 = True # restoration LoRA: stage-1-only native hi-res (per card)
113
+ GRAYSCALE_REF = True # colorize conditions on a B&W reference
 
 
 
114
  RES_PRESETS = {"960×544 (recommended)": (960, 544), "768×448 (fast)": (768, 448)}
115
+ DEFAULT_PRESET = "960×544 (recommended)"
116
  FRAME_CHOICES = [49, 73, 97, 121]
117
+ DEFAULT_FRAMES = 121
118
 
119
+ def build_prompt(p):
120
+ return (
121
+ "Reference shows the same scene in high-contrast monochrome with soft natural daylight. "
122
+ "Edited shows the same scene with natural colors restored. "
123
+ f"COLORIZE {p.strip()}. "
124
+ "Subject identity, framing, and background geometry are identical to the reference; "
125
+ "only color information differs between reference and edited."
126
+ )
127
 
128
+ EXAMPLES = [
129
+ ["examples/rabbit_rocks_gray.mp4",
130
+ "a young brown cottontail rabbit with warm tan and grey-brown fur, a pale cream underside and soft pink inner ears, perched on weathered grey granite boulders flecked with green and ochre lichen in warm late-afternoon sun; gentle wind, distant birdsong and soft rustling grass",
131
+ "960×544 (recommended)", 121, 42, False],
132
+ ["examples/surfing_gray.mp4",
133
+ "a surfer in a black wetsuit riding a curling turquoise ocean wave, bright white foam spraying off the crest, deep blue sky and sunlit teal water; powerful ocean waves crashing, rushing water, wind and distant seagulls",
134
+ "960×544 (recommended)", 121, 42, False],
135
+ ]
136
+ # =================================================================================
137
+
138
+ FPS = 24.0
139
+ MAX_SEED = np.iinfo(np.int32).max
140
+ HF_TOKEN = os.environ.get("HF_TOKEN")
141
+ LTX_MODEL_REPO = "Lightricks/LTX-2.3"
142
+ GEMMA_REPO = "google/gemma-3-12b-it-qat-q4_0-unquantized"
143
 
144
 
 
145
  def _src_fps(path, default=FPS):
146
  try:
147
  return float(iio.immeta(path, plugin="pyav").get("fps", default)) or default
 
149
  return default
150
 
151
 
152
+ def _prep_reference(path, width, height, num_frames):
153
+ """Resample to 24fps, aspect-fit/crop to WxH, NF frames; (optionally grayscale); write temp mp4."""
154
+ vid = iio.imread(path, plugin="pyav")
155
+ src_fps = _src_fps(path)
156
+ n = len(vid)
 
157
  out = []
158
  for i in range(num_frames):
159
+ idx = min(int(round(i / FPS * src_fps)), n - 1)
160
+ im = Image.fromarray(vid[idx]).convert("RGB")
161
+ im = ImageOps.fit(im, (width, height), Image.LANCZOS)
162
+ if GRAYSCALE_REF:
163
+ im = im.convert("L").convert("RGB")
164
+ out.append(np.array(im))
165
+ tmp = tempfile.mktemp(suffix=".mp4")
166
+ iio.imwrite(tmp, np.stack(out), fps=FPS, plugin="pyav", codec="libx264")
167
+ return tmp
168
+
169
+
170
+ def _pick_resolution(path, preset):
171
  w, h = RES_PRESETS[preset]
172
+ try:
173
+ f0 = iio.imread(path, plugin="pyav", index=0)
174
+ if f0.shape[0] > f0.shape[1]: # portrait
175
+ w, h = h, w
176
+ except Exception:
177
+ pass
178
  return w, h
179
 
180
 
181
+ # --- Load native pipeline + IC-LoRA once at module scope (ZeroGPU packs weights here) ---
182
+ print("Downloading checkpoints…")
183
+ checkpoint_path = hf_hub_download(LTX_MODEL_REPO, "ltx-2.3-22b-distilled-1.1.safetensors", token=HF_TOKEN)
184
+ spatial_upsampler_path = hf_hub_download(LTX_MODEL_REPO, "ltx-2.3-spatial-upscaler-x2-1.1.safetensors", token=HF_TOKEN)
185
+ gemma_root = snapshot_download(GEMMA_REPO, token=HF_TOKEN)
186
+ lora_path = hf_hub_download(LORA_REPO, LORA_FILE, token=HF_TOKEN)
187
+
188
+ print("Building ICLoraPipeline…")
189
+ pipeline = ICLoraPipeline(
190
+ distilled_checkpoint_path=checkpoint_path,
191
+ spatial_upsampler_path=spatial_upsampler_path,
192
+ gemma_root=gemma_root,
193
+ loras=[LoraPathStrengthAndSDOps(lora_path, LORA_SCALE, LTXV_LORA_COMFY_RENAMING_MAP)],
194
+ # bf16 (NOT fp8): the IC-LoRA must be fused into the transformer at MODULE SCOPE (the GPU
195
+ # worker can't re-open the checkpoint file). fp8_cast()'s fusion runs a custom CUDA kernel
196
+ # that can't be ZeroGPU-virtualised ("CUDA error: no CUDA-capable device"), but the bf16
197
+ # fuse rule is pure torch matmul/add → virtualisable + packable. ~53GB pack (fits H200).
198
+ quantization=None,
199
+ )
200
+
201
+
202
+ # All components (incl. the bf16-fused transformer) load + pin at MODULE SCOPE so ZeroGPU
203
+ # packs them (~53GB) and transfers them into each GPU worker — the worker can't re-open the
204
+ # checkpoint file, so nothing may be built there. The CPU-open loader patch above makes the
205
+ # host->device moves virtualisable; bf16 keeps the LoRA fusion virtualisable too.
206
+ def _preload_pin(ledger, tag):
207
+ if ledger is None:
208
+ return
209
+ for name in ["video_encoder", "video_decoder", "audio_encoder", "audio_decoder",
210
+ "vocoder", "spatial_upsampler", "text_encoder", "gemma_embeddings_processor",
211
+ "transformer"]:
212
+ fn = getattr(ledger, name, None)
213
+ if callable(fn):
214
+ try:
215
+ obj = fn()
216
+ setattr(ledger, name, (lambda o=obj: o))
217
+ print(f"[preload {tag}] {name} ✓")
218
+ except Exception as e:
219
+ print(f"[preload {tag}] {name} skipped: {e}")
220
+
221
+
222
+ _preload_pin(getattr(pipeline, "stage_1_model_ledger", None), "stage1")
223
+ if not SKIP_STAGE_2:
224
+ _preload_pin(getattr(pipeline, "stage_2_model_ledger", None), "stage2")
225
+ print("Pipeline ready (all components preloaded + pinned for ZeroGPU packing).")
226
 
227
 
228
  def _duration(*args, **kwargs):
229
+ nf = next((a for a in args if isinstance(a, int) and a in FRAME_CHOICES), DEFAULT_FRAMES)
230
+ return int(60 + nf * 1.2)
 
 
 
 
231
 
 
232
 
233
  @spaces.GPU(duration=_duration)
234
+ @torch.inference_mode()
235
+ def colorize(video, prompt, preset, num_frames, seed, randomize, progress=gr.Progress(track_tqdm=True)):
236
  if video is None:
237
+ raise gr.Error("Please upload a video.")
238
  if not prompt.strip():
239
+ raise gr.Error("Describe the result (e.g. 'a brown rabbit on grey rocks, soft birdsong').")
240
+ seed = random.randint(0, MAX_SEED) if randomize else int(seed)
 
 
 
241
  num_frames = int(num_frames)
242
+ width, height = _pick_resolution(video, preset)
243
+ ref_path = _prep_reference(video, width, height, num_frames)
244
+ tiling = TilingConfig.default()
245
+ # skip_stage_2 outputs at half the passed dims (height//2, width//2) — pass 2× so the
246
+ # final video matches the chosen preset. (Two-stage demos pass the preset directly.)
247
+ gen_w, gen_h = (width * 2, height * 2) if SKIP_STAGE_2 else (width, height)
248
+ video_out, audio_out = pipeline(
249
+ prompt=build_prompt(prompt),
250
+ seed=seed, height=gen_h, width=gen_w,
251
+ num_frames=num_frames, frame_rate=FPS,
252
+ images=[], video_conditioning=[(ref_path, 1.0)],
253
+ skip_stage_2=SKIP_STAGE_2, tiling_config=tiling,
 
 
 
 
 
 
 
 
254
  )
255
+ out_path = tempfile.mktemp(suffix=".mp4")
256
+ encode_video(video=video_out, fps=FPS, audio=audio_out, output_path=out_path,
257
+ video_chunks_number=get_video_chunks_number(num_frames, tiling))
258
  return out_path, seed
259
 
260
 
261
+ # --- UI config (match the public Space exactly) ---
262
+ RES_PRESETS = {"960×544 (recommended)": (960, 544), "768×448 (fast)": (768, 448)}
263
+ FRAME_CHOICES = [49, 73, 97, 121]
264
+
265
+
266
  with gr.Blocks(title="LTX-2.3 Colorize") as demo:
267
  gr.Markdown(
268
  "# 🎨 LTX-2.3 Video Colorization\n"
269
  "Restore natural color to black-and-white or desaturated footage while keeping subject, framing and "
270
+ "motion identity. Using [LTX 2.3 Distilled](https://huggingface.co/Lightricks/LTX-2.3) "
271
+ "with the [Colorization IC-LoRA](https://huggingface.co/Lightricks/LTX-2.3-22b-IC-LoRA-Colorization)."
272
  )
273
  with gr.Row():
274
  with gr.Column():
requirements.txt CHANGED
@@ -1,9 +1,11 @@
1
- git+https://github.com/huggingface/diffusers
2
- transformers
3
  accelerate
4
- peft
5
- safetensors
6
- sentencepiece
7
- imageio
8
- imageio-ffmpeg
9
  av
 
 
 
 
 
1
+ transformers==4.57.6
 
2
  accelerate
3
+ torch==2.8.0
4
+ torchaudio==2.8.0
5
+ einops
6
+ scipy
 
7
  av
8
+ scikit-image>=0.25.2
9
+ flashpack==0.1.2
10
+ imageio[ffmpeg]
11
+ pillow