Spaces:
Running on Zero
Running on Zero
fixes: natural-speed/aspect, audio, progress, prompts, examples
Browse files- app.py +79 -64
- examples/landscape_blur.mp4 +2 -2
- examples/man_laughing_blur.mp4 +2 -2
- examples/slicing_veggie_blur.mp4 +2 -2
app.py
CHANGED
|
@@ -8,10 +8,11 @@ import random
|
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
import numpy as np
|
|
|
|
| 11 |
import spaces
|
| 12 |
import torch
|
| 13 |
import gradio as gr
|
| 14 |
-
from PIL import Image
|
| 15 |
from huggingface_hub import hf_hub_download
|
| 16 |
from safetensors.torch import load_file
|
| 17 |
|
|
@@ -30,10 +31,7 @@ NUM_STEPS = len(DISTILLED_SIGMA_VALUES) # 8-step distilled schedule
|
|
| 30 |
MAX_SEED = np.iinfo(np.int32).max
|
| 31 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 32 |
|
| 33 |
-
RES_PRESETS = {
|
| 34 |
-
"Fast (768×448)": (768, 448),
|
| 35 |
-
"Quality (960×544)": (960, 544),
|
| 36 |
-
}
|
| 37 |
FRAME_CHOICES = [49, 73, 97, 121]
|
| 38 |
|
| 39 |
# --- Load pipeline once at module scope (ZeroGPU registers it) ---------------
|
|
@@ -47,40 +45,64 @@ pipe.set_adapters("deblur", LORA_SCALE)
|
|
| 47 |
|
| 48 |
|
| 49 |
# --- Helpers ----------------------------------------------------------------
|
| 50 |
-
def
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
w, h = RES_PRESETS[preset]
|
| 57 |
if first_frame.height > first_frame.width:
|
| 58 |
w, h = h, w
|
| 59 |
return w, h
|
| 60 |
|
| 61 |
|
| 62 |
-
def _build_prompt(scene
|
| 63 |
scene = scene.strip() or "the scene"
|
| 64 |
-
|
| 65 |
f"Reference shows {scene}, heavily out of focus with soft defocused blur and no fine detail. "
|
| 66 |
f"Edited shows the same scene in sharp focus with crisp detail and clean edges. "
|
| 67 |
f"DEBLUR {scene}. "
|
| 68 |
f"Subject identity, framing, and background geometry are identical to the reference; "
|
| 69 |
f"only focus and sharpness differ between reference and edited."
|
| 70 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
def _duration(*args, **kwargs):
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
num_frames = args[3] if len(args) > 3 else 73
|
| 77 |
per_frame = 1.6 if "Quality" in str(preset) else 1.0
|
| 78 |
-
return int(
|
| 79 |
|
| 80 |
|
| 81 |
# --- Inference --------------------------------------------------------------
|
| 82 |
@spaces.GPU(duration=_duration)
|
| 83 |
-
def deblur(video, scene, preset, num_frames, lora_scale, seed, randomize,
|
| 84 |
progress=gr.Progress(track_tqdm=True)):
|
| 85 |
if video is None:
|
| 86 |
raise gr.Error("Please upload an out-of-focus video to sharpen.")
|
|
@@ -88,43 +110,35 @@ def deblur(video, scene, preset, num_frames, lora_scale, seed, randomize,
|
|
| 88 |
if randomize:
|
| 89 |
seed = random.randint(0, MAX_SEED)
|
| 90 |
seed = int(seed)
|
| 91 |
-
|
| 92 |
-
frames = load_video(video)
|
| 93 |
-
if not frames:
|
| 94 |
-
raise gr.Error("Could not read any frames from that video.")
|
| 95 |
-
|
| 96 |
-
width, height = _pick_resolution(frames[0], preset)
|
| 97 |
num_frames = int(num_frames)
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
|
|
|
|
| 102 |
pipe.set_adapters("deblur", float(lora_scale))
|
| 103 |
-
prompt = _build_prompt(scene)
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
video_out,
|
| 107 |
prompt=prompt,
|
| 108 |
negative_prompt="",
|
| 109 |
-
reference_conditions=[
|
| 110 |
reference_downscale_factor=1,
|
| 111 |
-
width=width,
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
frame_rate=FPS,
|
| 115 |
-
num_inference_steps=NUM_STEPS,
|
| 116 |
-
sigmas=DISTILLED_SIGMA_VALUES,
|
| 117 |
-
guidance_scale=1.0,
|
| 118 |
-
stg_scale=0.0,
|
| 119 |
-
audio_guidance_scale=1.0,
|
| 120 |
-
audio_stg_scale=0.0,
|
| 121 |
generator=torch.Generator(device="cuda").manual_seed(seed),
|
| 122 |
-
output_type="np",
|
| 123 |
-
return_dict=False,
|
| 124 |
)
|
| 125 |
|
| 126 |
out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 127 |
-
|
| 128 |
return out_path, seed
|
| 129 |
|
| 130 |
|
|
@@ -132,20 +146,18 @@ def deblur(video, scene, preset, num_frames, lora_scale, seed, randomize,
|
|
| 132 |
with gr.Blocks(title="LTX-2.3 Deblur") as demo:
|
| 133 |
gr.Markdown(
|
| 134 |
"# 🔎 LTX-2.3 Video Deblur\n"
|
| 135 |
-
"Restore sharpness to out-of-focus / defocused footage while keeping subject, "
|
| 136 |
-
"
|
| 137 |
-
"
|
| 138 |
-
"IC-LoRA: [`linoyts/LTX-2.3-loras`](https://huggingface.co/linoyts/LTX-2.3-loras) · "
|
| 139 |
-
"base: distilled LTX-2.3."
|
| 140 |
)
|
| 141 |
with gr.Row():
|
| 142 |
with gr.Column():
|
| 143 |
video_in = gr.Video(label="Out-of-focus video")
|
| 144 |
-
scene = gr.Textbox(
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
)
|
| 149 |
with gr.Accordion("Settings", open=False):
|
| 150 |
preset = gr.Dropdown(list(RES_PRESETS), value="Fast (768×448)", label="Resolution")
|
| 151 |
num_frames = gr.Dropdown(FRAME_CHOICES, value=73, label="Frames (24fps)")
|
|
@@ -158,23 +170,26 @@ with gr.Blocks(title="LTX-2.3 Deblur") as demo:
|
|
| 158 |
video_out = gr.Video(label="Sharpened result")
|
| 159 |
used_seed = gr.Number(label="Seed used", interactive=False)
|
| 160 |
|
| 161 |
-
run.click(
|
| 162 |
-
|
| 163 |
-
inputs=[video_in, scene, preset, num_frames, lora_scale, seed, randomize],
|
| 164 |
-
outputs=[video_out, used_seed],
|
| 165 |
-
)
|
| 166 |
|
| 167 |
gr.Examples(
|
| 168 |
examples=[
|
| 169 |
-
["examples/man_laughing_blur.mp4",
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
],
|
| 173 |
-
inputs=[video_in, scene, preset, num_frames, lora_scale, seed, randomize],
|
| 174 |
-
outputs=[video_out, used_seed],
|
| 175 |
-
fn=deblur,
|
| 176 |
-
cache_examples=True,
|
| 177 |
-
cache_mode="lazy",
|
| 178 |
)
|
| 179 |
|
| 180 |
if __name__ == "__main__":
|
|
|
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
import numpy as np
|
| 11 |
+
import imageio.v3 as iio
|
| 12 |
import spaces
|
| 13 |
import torch
|
| 14 |
import gradio as gr
|
| 15 |
+
from PIL import Image, ImageOps
|
| 16 |
from huggingface_hub import hf_hub_download
|
| 17 |
from safetensors.torch import load_file
|
| 18 |
|
|
|
|
| 31 |
MAX_SEED = np.iinfo(np.int32).max
|
| 32 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 33 |
|
| 34 |
+
RES_PRESETS = {"Fast (768×448)": (768, 448), "Quality (960×544)": (960, 544)}
|
|
|
|
|
|
|
|
|
|
| 35 |
FRAME_CHOICES = [49, 73, 97, 121]
|
| 36 |
|
| 37 |
# --- Load pipeline once at module scope (ZeroGPU registers it) ---------------
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
# --- Helpers ----------------------------------------------------------------
|
| 48 |
+
def _src_fps(path, default=FPS):
|
| 49 |
+
try:
|
| 50 |
+
return float(iio.immeta(path, plugin="pyav").get("fps", default)) or default
|
| 51 |
+
except Exception:
|
| 52 |
+
return default
|
| 53 |
|
| 54 |
|
| 55 |
+
def _load_frames(path, num_frames, width, height):
|
| 56 |
+
"""Natural-speed (real-time at 24fps), aspect-preserving (center-crop) frames."""
|
| 57 |
+
frames = load_video(path)
|
| 58 |
+
if not frames:
|
| 59 |
+
return []
|
| 60 |
+
fps = _src_fps(path)
|
| 61 |
+
out = []
|
| 62 |
+
for i in range(num_frames):
|
| 63 |
+
idx = min(int(round(i / FPS * fps)), len(frames) - 1)
|
| 64 |
+
out.append(ImageOps.fit(frames[idx].convert("RGB"), (width, height), Image.LANCZOS))
|
| 65 |
+
return out
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _pick_resolution(first_frame, preset):
|
| 69 |
w, h = RES_PRESETS[preset]
|
| 70 |
if first_frame.height > first_frame.width:
|
| 71 |
w, h = h, w
|
| 72 |
return w, h
|
| 73 |
|
| 74 |
|
| 75 |
+
def _build_prompt(scene, audio):
|
| 76 |
scene = scene.strip() or "the scene"
|
| 77 |
+
p = (
|
| 78 |
f"Reference shows {scene}, heavily out of focus with soft defocused blur and no fine detail. "
|
| 79 |
f"Edited shows the same scene in sharp focus with crisp detail and clean edges. "
|
| 80 |
f"DEBLUR {scene}. "
|
| 81 |
f"Subject identity, framing, and background geometry are identical to the reference; "
|
| 82 |
f"only focus and sharpness differ between reference and edited."
|
| 83 |
)
|
| 84 |
+
if audio.strip():
|
| 85 |
+
p += f" Audio: {audio.strip()}."
|
| 86 |
+
return p
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _export(video_np, audio, path):
|
| 90 |
+
kw = {}
|
| 91 |
+
if audio is not None:
|
| 92 |
+
kw = dict(audio=audio[0].float().cpu(), audio_sample_rate=pipe.vocoder.config.output_sampling_rate)
|
| 93 |
+
encode_video(video_np, fps=FPS, output_path=path, **kw)
|
| 94 |
|
| 95 |
|
| 96 |
def _duration(*args, **kwargs):
|
| 97 |
+
preset = next((a for a in args if a in RES_PRESETS), "Fast")
|
| 98 |
+
num_frames = next((a for a in args if a in FRAME_CHOICES), 73)
|
|
|
|
| 99 |
per_frame = 1.6 if "Quality" in str(preset) else 1.0
|
| 100 |
+
return int(60 + int(num_frames) * per_frame)
|
| 101 |
|
| 102 |
|
| 103 |
# --- Inference --------------------------------------------------------------
|
| 104 |
@spaces.GPU(duration=_duration)
|
| 105 |
+
def deblur(video, scene, audio, preset, num_frames, lora_scale, seed, randomize,
|
| 106 |
progress=gr.Progress(track_tqdm=True)):
|
| 107 |
if video is None:
|
| 108 |
raise gr.Error("Please upload an out-of-focus video to sharpen.")
|
|
|
|
| 110 |
if randomize:
|
| 111 |
seed = random.randint(0, MAX_SEED)
|
| 112 |
seed = int(seed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
num_frames = int(num_frames)
|
| 114 |
|
| 115 |
+
probe = load_video(video)
|
| 116 |
+
if not probe:
|
| 117 |
+
raise gr.Error("Could not read any frames from that video.")
|
| 118 |
+
width, height = _pick_resolution(probe[0], preset)
|
| 119 |
|
| 120 |
+
ref = _load_frames(video, num_frames, width, height)
|
| 121 |
pipe.set_adapters("deblur", float(lora_scale))
|
| 122 |
+
prompt = _build_prompt(scene, audio)
|
| 123 |
+
|
| 124 |
+
def _cb(p, i, t, kw):
|
| 125 |
+
progress((i + 1) / NUM_STEPS, desc=f"Deblurring — step {i + 1}/{NUM_STEPS}")
|
| 126 |
+
return {}
|
| 127 |
|
| 128 |
+
video_out, audio_out = pipe(
|
| 129 |
prompt=prompt,
|
| 130 |
negative_prompt="",
|
| 131 |
+
reference_conditions=[LTX2ReferenceCondition(frames=ref, strength=1.0)],
|
| 132 |
reference_downscale_factor=1,
|
| 133 |
+
width=width, height=height, num_frames=num_frames, frame_rate=FPS,
|
| 134 |
+
num_inference_steps=NUM_STEPS, sigmas=DISTILLED_SIGMA_VALUES,
|
| 135 |
+
guidance_scale=1.0, stg_scale=0.0, audio_guidance_scale=1.0, audio_stg_scale=0.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
generator=torch.Generator(device="cuda").manual_seed(seed),
|
| 137 |
+
output_type="np", return_dict=False, callback_on_step_end=_cb,
|
|
|
|
| 138 |
)
|
| 139 |
|
| 140 |
out_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 141 |
+
_export(video_out[0], audio_out, out_path)
|
| 142 |
return out_path, seed
|
| 143 |
|
| 144 |
|
|
|
|
| 146 |
with gr.Blocks(title="LTX-2.3 Deblur") as demo:
|
| 147 |
gr.Markdown(
|
| 148 |
"# 🔎 LTX-2.3 Video Deblur\n"
|
| 149 |
+
"Restore sharpness to out-of-focus / defocused footage while keeping subject, framing, and scene "
|
| 150 |
+
"geometry intact. (Spatial defocus only — not motion blur.) Optionally describe the soundscape and "
|
| 151 |
+
"the model generates matching audio. "
|
| 152 |
+
"IC-LoRA: [`linoyts/LTX-2.3-loras`](https://huggingface.co/linoyts/LTX-2.3-loras) · base: distilled LTX-2.3."
|
|
|
|
| 153 |
)
|
| 154 |
with gr.Row():
|
| 155 |
with gr.Column():
|
| 156 |
video_in = gr.Video(label="Out-of-focus video")
|
| 157 |
+
scene = gr.Textbox(label="Scene description (optional — the clip does most of the work)", lines=2,
|
| 158 |
+
placeholder="a city street at dusk with passing cars and neon signs")
|
| 159 |
+
audio = gr.Textbox(label="Sound / audio (optional)", lines=1,
|
| 160 |
+
placeholder="city ambience, passing cars, distant chatter")
|
|
|
|
| 161 |
with gr.Accordion("Settings", open=False):
|
| 162 |
preset = gr.Dropdown(list(RES_PRESETS), value="Fast (768×448)", label="Resolution")
|
| 163 |
num_frames = gr.Dropdown(FRAME_CHOICES, value=73, label="Frames (24fps)")
|
|
|
|
| 170 |
video_out = gr.Video(label="Sharpened result")
|
| 171 |
used_seed = gr.Number(label="Seed used", interactive=False)
|
| 172 |
|
| 173 |
+
run.click(deblur, inputs=[video_in, scene, audio, preset, num_frames, lora_scale, seed, randomize],
|
| 174 |
+
outputs=[video_out, used_seed])
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
gr.Examples(
|
| 177 |
examples=[
|
| 178 |
+
["examples/man_laughing_blur.mp4",
|
| 179 |
+
"a close-up portrait of a man laughing warmly, his face crinkled with joy, soft natural light on his skin and hair",
|
| 180 |
+
"warm hearty laughter, a quiet room ambience",
|
| 181 |
+
"Fast (768×448)", 73, 1.0, 42, False],
|
| 182 |
+
["examples/slicing_veggie_blur.mp4",
|
| 183 |
+
"hands slicing fresh green zucchini into thin rounds on a wooden cutting board, crisp vegetable detail and a glinting knife edge",
|
| 184 |
+
"crisp rhythmic chopping on a wooden board, gentle kitchen ambience",
|
| 185 |
+
"Fast (768×448)", 73, 1.0, 42, False],
|
| 186 |
+
["examples/landscape_blur.mp4",
|
| 187 |
+
"a misty green mountain landscape over calm still water, fine detail in the trees and rippling reflections",
|
| 188 |
+
"gentle wind over water, distant birdsong",
|
| 189 |
+
"Fast (768×448)", 73, 1.0, 42, False],
|
| 190 |
],
|
| 191 |
+
inputs=[video_in, scene, audio, preset, num_frames, lora_scale, seed, randomize],
|
| 192 |
+
outputs=[video_out, used_seed], fn=deblur, cache_examples=True, cache_mode="lazy",
|
|
|
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
|
| 195 |
if __name__ == "__main__":
|
examples/landscape_blur.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a68dbd1095dccb3a3d1e9f4f9e7191820f98eaa7de18f373043afc896946624c
|
| 3 |
+
size 312352
|
examples/man_laughing_blur.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ed0460ba306b13a815c7bb56eeb7a84eb824d351b5a6ef3f9848c7b76ce968a
|
| 3 |
+
size 345836
|
examples/slicing_veggie_blur.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19edbb8a77b03578b02dfa53f2b32f945120e6554def62511787004d7d9123c9
|
| 3 |
+
size 347929
|