wan2-2-fp8da-aoti-preview

Running on Zero

App Files Files Community

mamungtai-sat commited on 23 days ago

Commit

9720e9a

verified ·

1 Parent(s): 638b93b

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -1

app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 Wan2.2-S2V Avatar — รูป + เสียง → วิดีโอคนพูด (lip-sync) บน ZeroGPU (H200)
 ใช้ diffusers fork (WanSpeechToVideoPipeline จาก PR #12258 ของ tolgacangoz)
 """
-import os, tempfile, math
 import torch
 import gradio as gr
 import spaces
@@ -41,6 +41,39 @@ def _snap_size(img, target_area=480 * 832, divisor=64, max_side=1280):
     return nh, nw
 @spaces.GPU(duration=300)
 def generate(image_path, audio_path, prompt, negative_prompt, steps, guidance, res_area, seed,
              progress=gr.Progress(track_tqdm=True)):
@@ -82,6 +115,11 @@ with gr.Blocks(title="Wan2.2-S2V Avatar", theme=gr.themes.Soft()) as demo:
         with gr.Column():
             image = gr.Image(type="filepath", label="ภาพ avatar (หน้าตรงชัด คน/การ์ตูน)")
             audio = gr.Audio(type="filepath", label="ไฟล์เสียงพูด (อัปโหลด/อัดเสียง)")
             prompt = gr.Textbox(label="Prompt (บรรยายฉาก/ท่าทาง)",
                                 value="a person talking to the camera, natural expression, slight head movement")
             with gr.Accordion("⚙️ ตั้งค่าขั้นสูง", open=False):
@@ -94,6 +132,7 @@ with gr.Blocks(title="Wan2.2-S2V Avatar", theme=gr.themes.Soft()) as demo:
         with gr.Column():
             out_video = gr.Video(label="ผลลัพธ์ (วิดีโอพูด)")
     btn.click(generate, [image, audio, prompt, negative_prompt, steps, guidance, res_area, seed], out_video)
 if __name__ == "__main__":
     demo.queue().launch()

 Wan2.2-S2V Avatar — รูป + เสียง → วิดีโอคนพูด (lip-sync) บน ZeroGPU (H200)
 ใช้ diffusers fork (WanSpeechToVideoPipeline จาก PR #12258 ของ tolgacangoz)
 """
+import os, tempfile, math, subprocess, glob
 import torch
 import gradio as gr
 import spaces
     return nh, nw
+def download_audio_url(url):
+    """Pull audio from a link (YouTube/TikTok/public web/audio file) -> mp3 16kHz mono"""
+    if not url or not url.strip():
+        return None, "วางลิงก์ก่อน"
+    url = url.strip()
+    import yt_dlp
+    key = abs(hash(url)) % 100000
+    raw = f"/tmp/araw_{key}.%(ext)s"
+    final = f"/tmp/audio_{key}.mp3"
+    for f in glob.glob(f"/tmp/araw_{key}.*"):
+        try: os.remove(f)
+        except Exception: pass
+    opts = {"outtmpl": raw, "quiet": True, "no_warnings": True, "noplaylist": True,
+            "overwrites": True, "ignoreerrors": True, "geo_bypass": True, "format": "bestaudio/best"}
+    try:
+        with yt_dlp.YoutubeDL(opts) as ydl:
+            ydl.download([url])
+    except Exception as e:
+        return None, f"❌ โหลดไม่ได้: {str(e)[:120]}"
+    dl = glob.glob(f"/tmp/araw_{key}.*")
+    if not dl:
+        return None, "❌ โหลดเสียงไม่สำเร็จ (ลิงก์อาจต้องล็อกอิน / โดนบล็อก)"
+    try:
+        subprocess.run(["ffmpeg", "-y", "-loglevel", "error", "-i", dl[0],
+                        "-vn", "-ar", "16000", "-ac", "1", "-c:a", "libmp3lame", final],
+                       timeout=300, check=True)
+    except Exception as e:
+        return None, f"❌ แปลงเสียงไม่สำเร็จ: {str(e)[:100]}"
+    if os.path.exists(final) and os.path.getsize(final) > 500:
+        return final, f"✓ โหลดเสียงแล้ว ({os.path.getsize(final)//1024} KB) — กด Generate ได้เลย"
+    return None, "❌ แปลงเสียงไม่สำเร็จ"
 @spaces.GPU(duration=300)
 def generate(image_path, audio_path, prompt, negative_prompt, steps, guidance, res_area, seed,
              progress=gr.Progress(track_tqdm=True)):
         with gr.Column():
             image = gr.Image(type="filepath", label="ภาพ avatar (หน้าตรงชัด คน/การ์ตูน)")
             audio = gr.Audio(type="filepath", label="ไฟล์เสียงพูด (อัปโหลด/อัดเสียง)")
+            with gr.Row(equal_height=True):
+                audio_url = gr.Textbox(placeholder="🔗 หรือวางลิงก์ YouTube/TikTok/เว็บ → ดึงเสียงอัตโนมัติ",
+                                       scale=4, container=False, lines=1, max_lines=1)
+                audio_url_btn = gr.Button("⬇️ ดึงเสียง", scale=1, min_width=110)
+            url_status = gr.Markdown("")
             prompt = gr.Textbox(label="Prompt (บรรยายฉาก/ท่าทาง)",
                                 value="a person talking to the camera, natural expression, slight head movement")
             with gr.Accordion("⚙️ ตั้งค่าขั้นสูง", open=False):
         with gr.Column():
             out_video = gr.Video(label="ผลลัพธ์ (วิดีโอพูด)")
     btn.click(generate, [image, audio, prompt, negative_prompt, steps, guidance, res_area, seed], out_video)
+    audio_url_btn.click(download_audio_url, inputs=audio_url, outputs=[audio, url_status])
 if __name__ == "__main__":
     demo.queue().launch()