Wan2.2-AnimatePKK

Running on Zero

App Files Files Community

pormungtai commited on May 3

Commit

159bfa1

verified ·

1 Parent(s): dd5894d

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -173

app.py CHANGED Viewed

@@ -1,199 +1,158 @@
 import spaces
-import os, sys, subprocess, importlib, site
-from PIL import Image
-import cv2, gradio as gr, gc, numpy as np, tempfile
 from huggingface_hub import snapshot_download
-# — Clone Wan2.2 source code from GitHub (contains wan/ module) —
-WAN_REPO = "https://github.com/Wan-Video/Wan2.2.git"
-WAN_DIR = os.path.join(os.getcwd(), "Wan2.2")
-if not os.path.exists(WAN_DIR):
-    print("Cloning Wan2.2 from GitHub...")
-    subprocess.run(["git", "clone", "--depth", "1", WAN_REPO, WAN_DIR], check=True)
-    print("Clone complete.")
-# — Patch wan/modules/t5.py: calls torch.cuda.current_device() at class
-#   definition time which fails at startup (no GPU yet in ZeroGPU). —
-t5_path = os.path.join(WAN_DIR, "wan", "modules", "t5.py")
-if os.path.exists(t5_path):
-    with open(t5_path) as f:
-        t5_code = f.read()
-    if "torch.cuda.current_device()" in t5_code:
-        t5_code = t5_code.replace(
-            "device=torch.cuda.current_device(),",
-            "device=0,  # patched for ZeroGPU"
         )
         with open(t5_path, "w") as f:
-            f.write(t5_code)
-        print("Patched wan/modules/t5.py for ZeroGPU compatibility.")
-sys.path.insert(0, WAN_DIR)
-PREPROCESS_DIR = os.path.join(WAN_DIR, "wan", "modules", "animate", "preprocess")
-sys.path.append(PREPROCESS_DIR)
-for sitedir in site.getsitepackages():
-    site.addsitedir(sitedir)
-importlib.invalidate_caches()
-# — Download SAM2 weights (small, just files) —
-try:
-    snapshot_download(repo_id="alexnasa/sam2_C_cpu", local_dir=os.getcwd())
-    print("sam2 weights downloaded successfully.")
-except Exception as e:
-    print(f"Warning: sam2 download failed: {e}")
-# — Download Wan2.2-Animate-14B weights —
-# ZeroGPU free tier has 50GB storage limit. Full model is ~51.5GB.
-# We skip the T5 encoder (11.4GB) + tokenizer since the animate task
-# is video-to-video motion transfer and uses internal text conditioning.
-# DiT (~34.5GB) + CLIP (~4.8GB) + VAE (~0.5GB) ≈ 40GB → fits under 50GB.
-print("Downloading Wan2.2-Animate-14B model weights (DiT + CLIP + VAE)...")
-snapshot_download(
-    repo_id="Wan-AI/Wan2.2-Animate-14B",
-    local_dir="./Wan2.2-Animate-14B",
-    ignore_patterns=[
-        "models_t5_*",          # T5 text encoder (11.4GB) — skipped to fit 50GB
-        "google/*",             # umt5-xxl tokenizer files
-        "tokenizer*",
-        "special_tokens_map.json",
-    ]
-)
-print("Model weights downloaded.")
-# — Now safe to import wan (t5.py is patched) —
-import torch
-from generate import generate, load_model
-from preprocess_data import run as run_preprocess
-from preprocess_data import load_preprocess_models
-# — Lazy model init: load inside @spaces.GPU on first call —
 _wan_animate = None
 def get_wan_animate():
     global _wan_animate
     if _wan_animate is None:
-        print("Loading WanAnimate model (first call)...")
-        _wan_animate = load_model(True)
-        print("WanAnimate model loaded.")
     return _wan_animate
-def clip_and_set_fps(input_video_path, output_video_path, duration_s=3, target_fps=8):
-    cmd = [
-        "ffmpeg", "-nostdin", "-hide_banner", "-y",
-        "-i", input_video_path, "-t", str(duration_s),
-        "-vf", f"fps={target_fps}",
-        "-c:v", "libx264", "-pix_fmt", "yuv420p",
-        "-preset", "veryfast", "-crf", "18",
-        "-c:a", "aac", "-movflags", "+faststart",
-        output_video_path,
-    ]
-    subprocess.run(cmd, check=True, capture_output=True)
-def preprocess_video(path, duration):
-    out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-    clip_and_set_fps(path, out, duration_s=duration)
-    return out
-def is_portrait(video_file):
-    cap = cv2.VideoCapture(video_file)
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    cap.release()
-    return w < h
-@spaces.GPU(duration=500)
-def predict(ref_img, video, mode, quality, max_duration_s):
     try:
-        if ref_img is None or video is None:
-            return None, "Error: Please provide both Reference Image and Template Video."
-        wan_animate = get_wan_animate()
-        replace_flag = (mode == "wan2.2-animate-mix")
-        tag_string = "replace_flag" if replace_flag else "retarget_flag"
-        input_video = preprocess_video(video, int(max_duration_s))
-        w, h = (480, 832) if is_portrait(input_video) else (832, 480)
-        edited_frame_png = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
-        Image.open(ref_img).save(edited_frame_png)
-        output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-        tmpdir = tempfile.mkdtemp()
-        preprocess_model = load_preprocess_models(int(max_duration_s))
-        src_pose_path, src_face_path, src_bg_path, src_mask_path, src_ref_path = run_preprocess(
-            preprocess_model, input_video, edited_frame_png, tmpdir, w, h, tag_string, {}, {})
-        generate(wan_animate, src_pose_path, src_face_path, src_bg_path,
-                 src_mask_path, src_ref_path, output_video_path, replace_flag)
-        gc.collect()
-        torch.cuda.empty_cache()
-        return output_video_path, "SUCCEEDED - Video generated successfully!"
     except Exception as e:
-        return None, f"Error: {str(e)}"
-# ——— Vivek957 UI ———
-HEAD = """
-<div style="text-align:center; margin-bottom:10px">
-  <h1 style="font-size:2em; font-weight:700">Wan2.2 Animate (ZeroGPU)</h1>
-  <p>Motion Transfer · Free ZeroGPU A100</p>
-  <div style="display:flex; gap:8px; justify-content:center; margin-top:8px">
-    <a href="https://arxiv.org/abs/2503.20314" target="_blank">
-      <button style="padding:6px 14px; border-radius:6px; border:1px solid #aaa; cursor:pointer">📄 Paper</button>
-    </a>
-    <a href="https://github.com/Wan-Video/Wan2.2" target="_blank">
-      <button style="padding:6px 14px; border-radius:6px; border:1px solid #aaa; cursor:pointer">💻 GitHub</button>
-    </a>
-    <a href="https://huggingface.co/Wan-AI/Wan2.2-Animate-14B" target="_blank">
-      <button style="padding:6px 14px; border-radius:6px; border:1px solid #aaa; cursor:pointer">🤗 HF Model</button>
-    </a>
-    <a href="https://modelscope.cn/models/Wan-AI/Wan2.2-Animate-14B" target="_blank">
-      <button style="padding:6px 14px; border-radius:6px; border:1px solid #aaa; cursor:pointer">🔮 ModelScope</button>
-    </a>
-  </div>
-</div>
-"""
 with gr.Blocks(title="Wan2.2 Animate") as demo:
-    gr.HTML(HEAD)
-    with gr.Accordion("📖 Usage", open=False):
-        gr.Markdown("""
-**How to use:**
-1. Upload a **Reference Image** (the character/person you want to animate)
-2. Upload a **Template Video** (the motion source)
-3. Choose **Mode** and **Quality**
-4. Click **Generate Video**
-        """)
     with gr.Row():
         with gr.Column():
-            ref_img = gr.Image(label="Reference Image(参考图片)", type="filepath")
-            video = gr.Video(label="Template Video(模板视频)")
-            mode = gr.Dropdown(
-                label="&#25512;&#29702;&#27169;&#24335;(Inference Mode)",
-                choices=["wan2.2-animate", "wan2.2-animate-mix"],
-                value="wan2.2-animate"
-            )
-            quality = gr.Dropdown(label="&#25512;&#29702;&#36136;&#37327;(Inference Quality)",
-                choices=["wan-pro", "wan-std"], value="wan-pro")
-            max_dur = gr.Slider(label="Max Duration (sec)", minimum=1, maximum=5,
-                step=1, value=3)
-            run_button = gr.Button("Generate Video(&#29983;&#25104;&#35270;&#39057;)", variant="primary")
         with gr.Column():
-            output_video = gr.Video(label="Output Video(&#36755;&#20986;&#35270;&#39057;)")
-            output_status = gr.Textbox(label="Status(&#29366;&#24577;)", lines=5)
-    run_button.click(fn=predict,
-        inputs=[ref_img, video, mode, quality, max_dur],
-        outputs=[output_video, output_status])
-demo.queue(default_concurrency_limit=5)
-demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
+import sys
+import subprocess
+import shutil
 import spaces
+import gradio as gr
+import torch
 from huggingface_hub import snapshot_download
+# ── Patch wan/modules/t5.py before importing wan ─────────────────────────────
+def clone_and_patch_wan():
+    if not os.path.exists("./Wan2.2"):
+        subprocess.run(
+            ["git", "clone", "https://github.com/Wan-Video/Wan2.2.git", "./Wan2.2"],
+            check=True
         )
+    t5_path = "./Wan2.2/wan/modules/t5.py"
+    with open(t5_path, "r") as f:
+        src = f.read()
+    if "device=torch.cuda.current_device()," in src:
+        src = src.replace("device=torch.cuda.current_device(),", "device=0,")
         with open(t5_path, "w") as f:
+            f.write(src)
+        print("[patch] t5.py patched: replaced current_device() with 0")
+clone_and_patch_wan()
+if "./Wan2.2" not in sys.path:
+    sys.path.insert(0, "./Wan2.2")
+# ── Download SAM2 CPU model ───────────────────────────────────────────────────
+if not os.path.exists("./process_checkpoint/sam2"):
+    snapshot_download(
+        repo_id="alexnasa/sam2_C_cpu",
+        local_dir="./process_checkpoint/sam2",
+    )
+    print("[init] SAM2 CPU model downloaded")
+# ── Download Wan2.2-Animate-14B (skip large unused files) ────────────────────
+if not os.path.exists("./Wan2.2-Animate-14B"):
+    snapshot_download(
+        repo_id="Wan-AI/Wan2.2-Animate-14B",
+        local_dir="./Wan2.2-Animate-14B",
+        ignore_patterns=[
+            "models_t5_*",
+            "google/*",
+            "tokenizer*",
+            "special_tokens_map.json",
+            "xlm-roberta-large/*",
+            "relighting_lora.ckpt",
+            "relighting_lora/*",
+            "process_checkpoint/sam2/*",
+        ]
+    )
+    print("[init] Wan2.2-Animate-14B downloaded")
+# ── Symlink SAM2 into model's expected path ───────────────────────────────────
+sam2_dst = "./Wan2.2-Animate-14B/process_checkpoint/sam2"
+sam2_src = "./process_checkpoint/sam2"
+if not os.path.exists(sam2_dst) and os.path.exists(sam2_src):
+    os.makedirs(os.path.dirname(sam2_dst), exist_ok=True)
+    os.symlink(os.path.abspath(sam2_src), sam2_dst)
+    print("[init] SAM2 symlink created")
+# ── Copy helper scripts ───────────────────────────────────────────────────────
+for fname in ["generate.py", "preprocess_data.py"]:
+    if os.path.exists(f"./{fname}") and not os.path.exists(f"./Wan2.2/{fname}"):
+        shutil.copy(f"./{fname}", f"./Wan2.2/{fname}")
+# ── Lazy model init ───────────────────────────────────────────────────────────
 _wan_animate = None
 def get_wan_animate():
     global _wan_animate
     if _wan_animate is None:
+        sys.path.insert(0, "./Wan2.2")
+        from generate import load_model
+        _wan_animate = load_model(False)
     return _wan_animate
+# ── Inference ─────────────────────────────────────────────────────────────────
+@spaces.GPU(duration=300)
+def run_animate(ref_image, template_video, mode, quality, max_duration):
+    import uuid
+    from generate import generate
+    wan_animate = get_wan_animate()
+    uid = str(uuid.uuid4())[:8]
+    work_dir = f"/tmp/wan_{uid}"
+    os.makedirs(work_dir, exist_ok=True)
     try:
+        ref_path  = os.path.join(work_dir, "ref.jpg")
+        tmpl_path = os.path.join(work_dir, "template.mp4")
+        import numpy as np
+        from PIL import Image
+        if isinstance(ref_image, np.ndarray):
+            Image.fromarray(ref_image).save(ref_path)
+        else:
+            shutil.copy(ref_image, ref_path)
+        shutil.copy(template_video, tmpl_path)
+        pose_path = os.path.join(work_dir, "pose.mp4")
+        face_path = os.path.join(work_dir, "face.png")
+        bg_path   = os.path.join(work_dir, "bg.png")
+        mask_path = os.path.join(work_dir, "mask.png")
+        from preprocess_data import preprocess
+        preprocess(
+            ref_image=ref_path,
+            template_video=tmpl_path,
+            output_pose=pose_path,
+            output_face=face_path,
+            output_bg=bg_path,
+            output_mask=mask_path,
+            mode=mode,
+        )
+        out_path = os.path.join(work_dir, "output.mp4")
+        generate(
+            wan_animate=wan_animate,
+            src_pose_path=pose_path,
+            src_face_path=face_path,
+            src_bg_path=bg_path,
+            src_mask_path=mask_path,
+            src_ref_path=ref_path,
+            save_file=out_path,
+        )
+        return out_path, "Done!"
     except Exception as e:
+        return None, f"Error: {e}"
+# ── UI ────────────────────────────────────────────────────────────────────────
 with gr.Blocks(title="Wan2.2 Animate") as demo:
+    gr.Markdown("## Wan2.2 Animate — ZeroGPU (Free A100)")
     with gr.Row():
         with gr.Column():
+            ref_image      = gr.Image(label="Reference Image", type="numpy")
+            template_video = gr.Video(label="Template Video")
+            mode           = gr.Dropdown(["normal", "tiktok"], value="normal", label="Mode")
+            quality        = gr.Dropdown(["standard", "high"], value="standard", label="Quality")
+            max_duration   = gr.Slider(1, 10, value=5, step=1, label="Max Duration (s)")
+            btn            = gr.Button("Generate", variant="primary")
         with gr.Column():
+            out_video = gr.Video(label="Output Video")
+            status    = gr.Textbox(label="Status", interactive=False)
+    btn.click(
+        run_animate,
+        inputs=[ref_image, template_video, mode, quality, max_duration],
+        outputs=[out_video, status],
+    )
+if __name__ == "__main__":
+    demo.launch()