Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
Wan2.2-S2V Avatar — รูป + เสียง → วิดีโอคนพูด (lip-sync) บน ZeroGPU (H200)
|
| 4 |
ใช้ diffusers fork (WanSpeechToVideoPipeline จาก PR #12258 ของ tolgacangoz)
|
| 5 |
"""
|
| 6 |
-
import os, tempfile, math
|
| 7 |
import torch
|
| 8 |
import gradio as gr
|
| 9 |
import spaces
|
|
@@ -41,6 +41,39 @@ def _snap_size(img, target_area=480 * 832, divisor=64, max_side=1280):
|
|
| 41 |
return nh, nw
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
@spaces.GPU(duration=300)
|
| 45 |
def generate(image_path, audio_path, prompt, negative_prompt, steps, guidance, res_area, seed,
|
| 46 |
progress=gr.Progress(track_tqdm=True)):
|
|
@@ -82,6 +115,11 @@ with gr.Blocks(title="Wan2.2-S2V Avatar", theme=gr.themes.Soft()) as demo:
|
|
| 82 |
with gr.Column():
|
| 83 |
image = gr.Image(type="filepath", label="ภาพ avatar (หน้าตรงชัด คน/การ์ตูน)")
|
| 84 |
audio = gr.Audio(type="filepath", label="ไฟล์เสียงพูด (อัปโหลด/อัดเสียง)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
prompt = gr.Textbox(label="Prompt (บรรยายฉาก/ท่าทาง)",
|
| 86 |
value="a person talking to the camera, natural expression, slight head movement")
|
| 87 |
with gr.Accordion("⚙️ ตั้งค่าขั้นสูง", open=False):
|
|
@@ -94,6 +132,7 @@ with gr.Blocks(title="Wan2.2-S2V Avatar", theme=gr.themes.Soft()) as demo:
|
|
| 94 |
with gr.Column():
|
| 95 |
out_video = gr.Video(label="ผลลัพธ์ (วิดีโอพูด)")
|
| 96 |
btn.click(generate, [image, audio, prompt, negative_prompt, steps, guidance, res_area, seed], out_video)
|
|
|
|
| 97 |
|
| 98 |
if __name__ == "__main__":
|
| 99 |
demo.queue().launch()
|
|
|
|
| 3 |
Wan2.2-S2V Avatar — รูป + เสียง → วิดีโอคนพูด (lip-sync) บน ZeroGPU (H200)
|
| 4 |
ใช้ diffusers fork (WanSpeechToVideoPipeline จาก PR #12258 ของ tolgacangoz)
|
| 5 |
"""
|
| 6 |
+
import os, tempfile, math, subprocess, glob
|
| 7 |
import torch
|
| 8 |
import gradio as gr
|
| 9 |
import spaces
|
|
|
|
| 41 |
return nh, nw
|
| 42 |
|
| 43 |
|
| 44 |
+
def download_audio_url(url):
|
| 45 |
+
"""Pull audio from a link (YouTube/TikTok/public web/audio file) -> mp3 16kHz mono"""
|
| 46 |
+
if not url or not url.strip():
|
| 47 |
+
return None, "วางลิงก์ก่อน"
|
| 48 |
+
url = url.strip()
|
| 49 |
+
import yt_dlp
|
| 50 |
+
key = abs(hash(url)) % 100000
|
| 51 |
+
raw = f"/tmp/araw_{key}.%(ext)s"
|
| 52 |
+
final = f"/tmp/audio_{key}.mp3"
|
| 53 |
+
for f in glob.glob(f"/tmp/araw_{key}.*"):
|
| 54 |
+
try: os.remove(f)
|
| 55 |
+
except Exception: pass
|
| 56 |
+
opts = {"outtmpl": raw, "quiet": True, "no_warnings": True, "noplaylist": True,
|
| 57 |
+
"overwrites": True, "ignoreerrors": True, "geo_bypass": True, "format": "bestaudio/best"}
|
| 58 |
+
try:
|
| 59 |
+
with yt_dlp.YoutubeDL(opts) as ydl:
|
| 60 |
+
ydl.download([url])
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return None, f"❌ โหลดไม่ได้: {str(e)[:120]}"
|
| 63 |
+
dl = glob.glob(f"/tmp/araw_{key}.*")
|
| 64 |
+
if not dl:
|
| 65 |
+
return None, "❌ โหลดเสียงไม่สำเร็จ (ลิงก์อาจต้องล็อกอิน / โดนบล็อก)"
|
| 66 |
+
try:
|
| 67 |
+
subprocess.run(["ffmpeg", "-y", "-loglevel", "error", "-i", dl[0],
|
| 68 |
+
"-vn", "-ar", "16000", "-ac", "1", "-c:a", "libmp3lame", final],
|
| 69 |
+
timeout=300, check=True)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return None, f"❌ แปลงเสียงไม่สำเร็จ: {str(e)[:100]}"
|
| 72 |
+
if os.path.exists(final) and os.path.getsize(final) > 500:
|
| 73 |
+
return final, f"✓ โหลดเสียงแล้ว ({os.path.getsize(final)//1024} KB) — กด Generate ได้เลย"
|
| 74 |
+
return None, "❌ แปลงเสียงไม่สำเร็จ"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
@spaces.GPU(duration=300)
|
| 78 |
def generate(image_path, audio_path, prompt, negative_prompt, steps, guidance, res_area, seed,
|
| 79 |
progress=gr.Progress(track_tqdm=True)):
|
|
|
|
| 115 |
with gr.Column():
|
| 116 |
image = gr.Image(type="filepath", label="ภาพ avatar (หน้าตรงชัด คน/การ์ตูน)")
|
| 117 |
audio = gr.Audio(type="filepath", label="ไฟล์เสียงพูด (อัปโหลด/อัดเสียง)")
|
| 118 |
+
with gr.Row(equal_height=True):
|
| 119 |
+
audio_url = gr.Textbox(placeholder="🔗 หรือวางลิงก์ YouTube/TikTok/เว็บ → ดึงเสียงอัตโนมัติ",
|
| 120 |
+
scale=4, container=False, lines=1, max_lines=1)
|
| 121 |
+
audio_url_btn = gr.Button("⬇️ ดึงเสียง", scale=1, min_width=110)
|
| 122 |
+
url_status = gr.Markdown("")
|
| 123 |
prompt = gr.Textbox(label="Prompt (บรรยายฉาก/ท่าทาง)",
|
| 124 |
value="a person talking to the camera, natural expression, slight head movement")
|
| 125 |
with gr.Accordion("⚙️ ตั้งค่าขั้นสูง", open=False):
|
|
|
|
| 132 |
with gr.Column():
|
| 133 |
out_video = gr.Video(label="ผลลัพธ์ (วิดีโอพูด)")
|
| 134 |
btn.click(generate, [image, audio, prompt, negative_prompt, steps, guidance, res_area, seed], out_video)
|
| 135 |
+
audio_url_btn.click(download_audio_url, inputs=audio_url, outputs=[audio, url_status])
|
| 136 |
|
| 137 |
if __name__ == "__main__":
|
| 138 |
demo.queue().launch()
|