ismailkattakath's picture
Fix: set api_name=False to suppress gradio_client schema TypeError on /api/info
18c57e9 verified
Raw
History Blame Contribute Delete
7.62 kB
"""
BFS — Best Face Swap Video · Hugging Face Space
"""
from __future__ import annotations
import os
import tempfile
import gradio as gr
import numpy as np
from PIL import Image
from composer import compose_frames, crop_reserved_region
from video_utils import (
compute_target_size,
extract_audio,
frames_for_duration,
load_video_frames,
resize_frames,
save_video,
)
# ---------------------------------------------------------------------------
# GPU decorator — no-op locally, activates the ZeroGPU grant on HF Spaces
# ---------------------------------------------------------------------------
try:
import spaces
GPU = spaces.GPU
except ImportError:
def GPU(fn=None, **kwargs): # type: ignore
return fn if fn is not None else lambda f: f
# ---------------------------------------------------------------------------
# Global model state (loaded once per worker)
# ---------------------------------------------------------------------------
_pipeline_state: dict | None = None
REGION_SIZE = 256
DEFAULT_FPS = 24.0
DEFAULT_DURATION = 5.0
DEFAULT_RESOLUTION = 768
# ---------------------------------------------------------------------------
# Core processing function
# ---------------------------------------------------------------------------
@GPU(duration=300)
def generate(
guide_video_path: str,
face_image: Image.Image,
prompt: str,
duration: float,
fps: float,
lora_strength: float,
seed: int,
hf_token: str = "",
progress: gr.Progress = gr.Progress(track_tqdm=True),
) -> tuple[str, str]:
"""
Full head-swap pipeline:
1. Load + resize guide video frames
2. Compose chroma face strip (ReservedRegionFrameComposer)
3. Run LTX-2.3 diffusion
4. Crop face strip from output
5. Mux original audio back in
Returns (output_video_path, status_message).
"""
global _pipeline_state
# ---- validate inputs early ----
if guide_video_path is None:
return "", "Please upload a guide video."
if face_image is None:
return "", "Please upload a reference face image."
if not prompt.strip():
return "", "Please enter a text prompt."
# ---- lazy model load ----
if _pipeline_state is None:
from pipeline import load_pipeline
progress(0, desc="Loading models (first run only — ~5 min)…")
_pipeline_state = load_pipeline(
token=hf_token.strip() or None,
progress_cb=lambda msg: progress(0, desc=msg),
)
progress(0.05, desc="Loading guide video…")
frames, source_fps = load_video_frames(guide_video_path)
if len(frames) == 0:
return "", "Could not read frames from the guide video."
# ---- extract audio before we do anything else ----
audio_tmp = tempfile.mktemp(suffix=".wav")
has_audio = extract_audio(guide_video_path, audio_tmp)
# ---- resize frames ----
progress(0.10, desc="Resizing frames…")
orig_h, orig_w = frames.shape[1], frames.shape[2]
target_w, target_h = compute_target_size(orig_w, orig_h, DEFAULT_RESOLUTION)
frames = resize_frames(frames, target_w, target_h)
# ---- trim / pad to requested duration ----
n_frames = frames_for_duration(fps, duration)
if len(frames) >= n_frames:
frames = frames[:n_frames]
else:
# loop last frame
pad = np.stack([frames[-1]] * (n_frames - len(frames)))
frames = np.concatenate([frames, pad], axis=0)
# ---- compose chroma strip ----
progress(0.15, desc="Compositing reference face strip…")
composed = compose_frames(
frames,
face_image,
region_position="left",
region_size_px=REGION_SIZE,
)
# ---- run diffusion ----
progress(0.20, desc="Running LTX-2.3 diffusion…")
from pipeline import run_inference
generated = run_inference(
_pipeline_state,
composed,
prompt=prompt,
fps=fps,
lora_strength=lora_strength,
seed=int(seed),
progress_cb=lambda msg: progress(0.20, desc=msg),
)
# ---- crop face strip from output ----
progress(0.90, desc="Cropping reserved region…")
cropped = crop_reserved_region(
generated,
region_position="left",
region_size_px=REGION_SIZE,
output_size=(target_w, target_h),
)
# ---- save output video with audio ----
progress(0.95, desc="Encoding output video…")
out_path = tempfile.mktemp(suffix=".mp4")
save_video(
cropped,
fps=fps,
output_path=out_path,
audio_path=audio_tmp if has_audio else None,
audio_duration=duration,
)
progress(1.0, desc="Done.")
return out_path, "Generation complete."
# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
DESCRIPTION = """
# BFS — Best Face Swap Video
Swap the identity in any video using the **V3 persistent-template** technique.
The reference face is placed in a green chroma side-strip that persists across
all frames, giving the model continuous identity conditioning throughout generation.
**Prompt format:**
```
head_swap:
FACE: Female, fair skin, ~25 years old, long wavy auburn hair, green eyes…
ACTION: A person in a grey hoodie walks toward the camera indoors…
```
"""
EXAMPLES: list[list] = [
# [guide_video, face_image, prompt, duration, fps, lora_strength, seed]
]
with gr.Blocks(title="BFS — Best Face Swap Video") as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=1):
guide_video = gr.Video(label="Guide Video", sources=["upload"])
face_image = gr.Image(label="Reference Face", type="pil", sources=["upload"])
prompt = gr.Textbox(
label="Text Prompt",
placeholder="head_swap:\nFACE: ...\nACTION: ...",
lines=6,
)
with gr.Accordion("Parameters", open=False):
duration = gr.Slider(1, 15, value=DEFAULT_DURATION, step=0.5, label="Duration (seconds)")
fps = gr.Slider(8, 30, value=DEFAULT_FPS, step=1.0, label="FPS")
lora_strength = gr.Slider(0.5, 1.5, value=1.2, step=0.05, label="Face Swap Strength")
seed = gr.Number(value=42, label="Seed", precision=0)
hf_token = gr.Textbox(
label="HF Token (optional)",
type="password",
placeholder="hf_… — only needed if the Space owner's token has no access to a gated model",
)
run_btn = gr.Button("Generate", variant="primary")
with gr.Column(scale=1):
output_video = gr.Video(label="Result", interactive=False)
status_text = gr.Textbox(label="Status", interactive=False)
run_btn.click(
fn=generate,
inputs=[guide_video, face_image, prompt, duration, fps, lora_strength, seed, hf_token],
outputs=[output_video, status_text],
api_name=False,
)
gr.Markdown("""
---
**Hardware:** A100 80 GB GPU required.
**Model:** [Alissonerdx/BFS-Best-Face-Swap-Video](https://huggingface.co/Alissonerdx/BFS-Best-Face-Swap-Video) · Built on [LTX-2.3](https://huggingface.co/Lightricks/LTX-2.3)
**License:** For research and professional VFX use only. You must have explicit consent for any likeness you process.
""")
if __name__ == "__main__":
demo.launch()