| """ |
| BFS — Best Face Swap Video · Hugging Face Space |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| import tempfile |
|
|
| import gradio as gr |
| import numpy as np |
| from PIL import Image |
|
|
| from composer import compose_frames, crop_reserved_region |
| from video_utils import ( |
| compute_target_size, |
| extract_audio, |
| frames_for_duration, |
| load_video_frames, |
| resize_frames, |
| save_video, |
| ) |
|
|
| |
| |
| |
| try: |
| import spaces |
| GPU = spaces.GPU |
| except ImportError: |
| def GPU(fn=None, **kwargs): |
| return fn if fn is not None else lambda f: f |
|
|
| |
| |
| |
| _pipeline_state: dict | None = None |
|
|
| REGION_SIZE = 256 |
| DEFAULT_FPS = 24.0 |
| DEFAULT_DURATION = 5.0 |
| DEFAULT_RESOLUTION = 768 |
|
|
| |
| |
| |
|
|
| @GPU(duration=300) |
| def generate( |
| guide_video_path: str, |
| face_image: Image.Image, |
| prompt: str, |
| duration: float, |
| fps: float, |
| lora_strength: float, |
| seed: int, |
| hf_token: str = "", |
| progress: gr.Progress = gr.Progress(track_tqdm=True), |
| ) -> tuple[str, str]: |
| """ |
| Full head-swap pipeline: |
| 1. Load + resize guide video frames |
| 2. Compose chroma face strip (ReservedRegionFrameComposer) |
| 3. Run LTX-2.3 diffusion |
| 4. Crop face strip from output |
| 5. Mux original audio back in |
| |
| Returns (output_video_path, status_message). |
| """ |
| global _pipeline_state |
|
|
| |
| if guide_video_path is None: |
| return "", "Please upload a guide video." |
| if face_image is None: |
| return "", "Please upload a reference face image." |
| if not prompt.strip(): |
| return "", "Please enter a text prompt." |
|
|
| |
| if _pipeline_state is None: |
| from pipeline import load_pipeline |
| progress(0, desc="Loading models (first run only — ~5 min)…") |
| _pipeline_state = load_pipeline( |
| token=hf_token.strip() or None, |
| progress_cb=lambda msg: progress(0, desc=msg), |
| ) |
|
|
| progress(0.05, desc="Loading guide video…") |
| frames, source_fps = load_video_frames(guide_video_path) |
| if len(frames) == 0: |
| return "", "Could not read frames from the guide video." |
|
|
| |
| audio_tmp = tempfile.mktemp(suffix=".wav") |
| has_audio = extract_audio(guide_video_path, audio_tmp) |
|
|
| |
| progress(0.10, desc="Resizing frames…") |
| orig_h, orig_w = frames.shape[1], frames.shape[2] |
| target_w, target_h = compute_target_size(orig_w, orig_h, DEFAULT_RESOLUTION) |
| frames = resize_frames(frames, target_w, target_h) |
|
|
| |
| n_frames = frames_for_duration(fps, duration) |
| if len(frames) >= n_frames: |
| frames = frames[:n_frames] |
| else: |
| |
| pad = np.stack([frames[-1]] * (n_frames - len(frames))) |
| frames = np.concatenate([frames, pad], axis=0) |
|
|
| |
| progress(0.15, desc="Compositing reference face strip…") |
| composed = compose_frames( |
| frames, |
| face_image, |
| region_position="left", |
| region_size_px=REGION_SIZE, |
| ) |
|
|
| |
| progress(0.20, desc="Running LTX-2.3 diffusion…") |
| from pipeline import run_inference |
| generated = run_inference( |
| _pipeline_state, |
| composed, |
| prompt=prompt, |
| fps=fps, |
| lora_strength=lora_strength, |
| seed=int(seed), |
| progress_cb=lambda msg: progress(0.20, desc=msg), |
| ) |
|
|
| |
| progress(0.90, desc="Cropping reserved region…") |
| cropped = crop_reserved_region( |
| generated, |
| region_position="left", |
| region_size_px=REGION_SIZE, |
| output_size=(target_w, target_h), |
| ) |
|
|
| |
| progress(0.95, desc="Encoding output video…") |
| out_path = tempfile.mktemp(suffix=".mp4") |
| save_video( |
| cropped, |
| fps=fps, |
| output_path=out_path, |
| audio_path=audio_tmp if has_audio else None, |
| audio_duration=duration, |
| ) |
|
|
| progress(1.0, desc="Done.") |
| return out_path, "Generation complete." |
|
|
|
|
| |
| |
| |
|
|
| DESCRIPTION = """ |
| # BFS — Best Face Swap Video |
| |
| Swap the identity in any video using the **V3 persistent-template** technique. |
| The reference face is placed in a green chroma side-strip that persists across |
| all frames, giving the model continuous identity conditioning throughout generation. |
| |
| **Prompt format:** |
| ``` |
| head_swap: |
| FACE: Female, fair skin, ~25 years old, long wavy auburn hair, green eyes… |
| ACTION: A person in a grey hoodie walks toward the camera indoors… |
| ``` |
| """ |
|
|
| EXAMPLES: list[list] = [ |
| |
| ] |
|
|
| with gr.Blocks(title="BFS — Best Face Swap Video") as demo: |
| gr.Markdown(DESCRIPTION) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| guide_video = gr.Video(label="Guide Video", sources=["upload"]) |
| face_image = gr.Image(label="Reference Face", type="pil", sources=["upload"]) |
| prompt = gr.Textbox( |
| label="Text Prompt", |
| placeholder="head_swap:\nFACE: ...\nACTION: ...", |
| lines=6, |
| ) |
|
|
| with gr.Accordion("Parameters", open=False): |
| duration = gr.Slider(1, 15, value=DEFAULT_DURATION, step=0.5, label="Duration (seconds)") |
| fps = gr.Slider(8, 30, value=DEFAULT_FPS, step=1.0, label="FPS") |
| lora_strength = gr.Slider(0.5, 1.5, value=1.2, step=0.05, label="Face Swap Strength") |
| seed = gr.Number(value=42, label="Seed", precision=0) |
| hf_token = gr.Textbox( |
| label="HF Token (optional)", |
| type="password", |
| placeholder="hf_… — only needed if the Space owner's token has no access to a gated model", |
| ) |
|
|
| run_btn = gr.Button("Generate", variant="primary") |
|
|
| with gr.Column(scale=1): |
| output_video = gr.Video(label="Result", interactive=False) |
| status_text = gr.Textbox(label="Status", interactive=False) |
|
|
| run_btn.click( |
| fn=generate, |
| inputs=[guide_video, face_image, prompt, duration, fps, lora_strength, seed, hf_token], |
| outputs=[output_video, status_text], |
| api_name=False, |
| ) |
|
|
| gr.Markdown(""" |
| --- |
| **Hardware:** A100 80 GB GPU required. |
| **Model:** [Alissonerdx/BFS-Best-Face-Swap-Video](https://huggingface.co/Alissonerdx/BFS-Best-Face-Swap-Video) · Built on [LTX-2.3](https://huggingface.co/Lightricks/LTX-2.3) |
| **License:** For research and professional VFX use only. You must have explicit consent for any likeness you process. |
| """) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|