| """ |
| Idle Video Generator — generates natural-looking idle animation clips using FLOAT. |
| Uses scripted pose control with dampening curves for realistic idle behavior. |
| Extracted from standalone script for integration into avatar creation pipeline. |
| """ |
| import os |
| import sys |
| import math |
| import random |
| import time |
| import logging |
| import torch |
| import cv2 |
| import numpy as np |
|
|
| logger = logging.getLogger(__name__) |
|
|
| FLOAT_REPO_PATH = "/app/float_repo" |
| if FLOAT_REPO_PATH not in sys.path: |
| sys.path.insert(0, FLOAT_REPO_PATH) |
|
|
| |
| FPS = 25.0 |
| DURATION_SEC = 5.0 |
| SAMPLE_RATE = 16000 |
| NUM_CLIPS = 6 |
|
|
| |
| CLIP_MODES = [ |
| |
| {"damp_mid": 0.50, "damp_amp": 0.15, "label": "Wide Variable"}, |
| {"damp_mid": 0.50, "damp_amp": 0.15, "label": "Wide Variable"}, |
| |
| {"damp_mid": 0.65, "damp_amp": 0.10, "label": "High Fixed"}, |
| {"damp_mid": 0.65, "damp_amp": 0.10, "label": "High Fixed"}, |
| {"damp_mid": 0.65, "damp_amp": 0.10, "label": "High Fixed"}, |
| |
| {"damp_mid": 0.85, "damp_amp": 0.05, "label": "Very High Activity"}, |
| ] |
|
|
|
|
| class IdleVideoGenerator: |
| """Generates idle animation clips using FLOAT model with scripted pose control.""" |
|
|
| def __init__(self, float_model, device, input_size=512, lipsync_instance=None): |
| """ |
| Args: |
| float_model: Loaded FLOAT model instance (already on device, eval mode) |
| device: torch device |
| input_size: image size (default 512) |
| lipsync_instance: FloatLipsync instance for reusing its face crop logic |
| """ |
| self.model = float_model |
| self.device = device |
| self.input_size = input_size |
| self.lipsync = lipsync_instance |
|
|
| |
| self._original_sample = self.model.sample |
|
|
| def _load_reference_image(self, image_path): |
| """Load and preprocess a reference image using float_lipsync's proven crop logic.""" |
| if self.lipsync: |
| |
| self.lipsync._preload_reference_image(image_path) |
| return self.lipsync.preprocessed_ref_image.clone() |
| else: |
| |
| import albumentations as A |
| import albumentations.pytorch.transforms as A_pytorch |
| img = cv2.imread(image_path) |
| if img is None: |
| raise FileNotFoundError(f"Could not read image: {image_path}") |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
| transform = A.Compose([ |
| A.Resize(height=self.input_size, width=self.input_size, interpolation=cv2.INTER_AREA), |
| A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), |
| A_pytorch.ToTensorV2(), |
| ]) |
| return transform(image=img)['image'].unsqueeze(0).to(self.device) |
|
|
| def generate(self, ref_image_path, output_dir, avatar_name, |
| num_clips=NUM_CLIPS, progress_callback=None): |
| """ |
| Generate idle video clips for an avatar. |
| |
| Args: |
| ref_image_path: Path to reference face image |
| output_dir: Directory to save clips (e.g. /app/avatars/Darwin/idlevideos/) |
| avatar_name: Name for file naming (e.g. "Darwin") |
| num_clips: Number of clips to generate (default 10) |
| progress_callback: Optional function(clip_index, total, status_text) for progress updates |
| |
| Returns: |
| List of generated video file paths |
| """ |
| os.makedirs(output_dir, exist_ok=True) |
|
|
| total_frames = int(DURATION_SEC * FPS) |
| fade_frames = int(1.0 * FPS) |
|
|
| |
| if progress_callback: |
| progress_callback(0, num_clips, "Loading reference image...") |
|
|
| s = self._load_reference_image(ref_image_path) |
|
|
| |
| a = torch.zeros(1, int(DURATION_SEC * SAMPLE_RATE)).to(self.device) |
| data = {'s': s, 'a': a} |
|
|
| |
| dampening_curve = torch.zeros(1, total_frames, 1).to(self.device) |
|
|
| original_sample = self._original_sample |
|
|
| def dynamic_dampened_sample(*args, **kwargs): |
| r_d = original_sample(*args, **kwargs) |
| return r_d * dampening_curve |
|
|
| |
| self.model.sample = dynamic_dampened_sample |
|
|
| generated_paths = [] |
|
|
| try: |
| for clip_idx in range(num_clips): |
| clip_num = clip_idx + 1 |
| mode = CLIP_MODES[clip_idx % len(CLIP_MODES)] |
|
|
| status = f"Generating clip {clip_num}/{num_clips} ({mode['label']})" |
| logger.info(f"[IDLE_GEN] {status}") |
| if progress_callback: |
| progress_callback(clip_idx, num_clips, status) |
|
|
| t0 = time.time() |
|
|
| |
| pose_control = torch.zeros(1, total_frames, 20).to(self.device) |
|
|
| |
| pitch_phase = random.uniform(0, 2 * math.pi) |
| yaw_phase = random.uniform(0, 2 * math.pi) |
| mouth_phase = random.uniform(0, 2 * math.pi) |
| blend_phase = random.uniform(0, 2 * math.pi) |
| damp_phase = random.uniform(0, 2 * math.pi) |
|
|
| for t in range(total_frames): |
| |
| dampening_curve[0, t, 0] = mode["damp_mid"] + mode["damp_amp"] * math.sin(t * 0.06 + damp_phase) |
|
|
| |
| pose_control[:, t, 1] = 0.08 * math.sin(t * 0.05 + pitch_phase) |
| pose_control[:, t, 2] = 0.04 * math.cos(t * 0.03 + yaw_phase) |
|
|
| |
| overall_tension = 3.5 + 1.5 * math.sin(t * 0.08 + mouth_phase) |
| blend = (math.sin(t * 0.04 + blend_phase) + 1.0) / 2.0 |
|
|
| pose_control[:, t, 5] = overall_tension * blend |
| pose_control[:, t, 9] = overall_tension * (1.0 - blend) |
|
|
| |
| if t >= total_frames - fade_frames: |
| fade_multiplier = (total_frames - t - 1) / float(fade_frames) |
| pose_control[:, t, :] *= fade_multiplier |
|
|
| |
| data['p'] = pose_control |
| with torch.no_grad(): |
| out = self.model.inference( |
| data, |
| a_cfg_scale=1.0, |
| r_cfg_scale=1.0, |
| e_cfg_scale=1.0, |
| emo='S2E', |
| nfe=7, |
| seed=random.randint(1, 10000), |
| ) |
|
|
| |
| d_hat = out['d_hat'].cpu().clamp(-1, 1) |
| d_hat = ((d_hat + 1) / 2 * 255).to(torch.uint8) |
| frames = d_hat.permute(0, 2, 3, 1).numpy() |
|
|
| out_name = f"{avatar_name}_idle_{clip_num}.mp4" |
| out_path = os.path.join(output_dir, out_name) |
| self._save_video(frames, out_path) |
|
|
| generated_paths.append(out_path) |
| elapsed = time.time() - t0 |
| logger.info(f"[IDLE_GEN] ✓ {out_name} ({elapsed:.1f}s)") |
|
|
| if progress_callback: |
| progress_callback(clip_idx + 1, num_clips, f"Clip {clip_num}/{num_clips} done ({elapsed:.0f}s)") |
|
|
| finally: |
| |
| self.model.sample = original_sample |
|
|
| logger.info(f"[IDLE_GEN] ✓ All {len(generated_paths)} clips generated") |
| return generated_paths |
|
|
| def _save_video(self, frames_np, output_path): |
| """Save numpy frames array to mp4 using ffmpeg.""" |
| import subprocess |
|
|
| height, width = frames_np.shape[1], frames_np.shape[2] |
|
|
| cmd = [ |
| 'ffmpeg', '-y', |
| '-f', 'rawvideo', '-vcodec', 'rawvideo', |
| '-s', f'{width}x{height}', '-pix_fmt', 'rgb24', |
| '-r', str(FPS), |
| '-i', 'pipe:0', |
| '-c:v', 'libx264', '-preset', 'fast', '-crf', '23', |
| '-pix_fmt', 'yuv420p', |
| output_path |
| ] |
|
|
| process = subprocess.Popen( |
| cmd, stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE, stderr=subprocess.PIPE |
| ) |
| stdout, stderr = process.communicate(input=frames_np.tobytes()) |
|
|
| if process.returncode != 0: |
| logger.error(f"[IDLE_GEN] ffmpeg error: {stderr.decode()[:300]}") |
| raise RuntimeError("ffmpeg encoding failed") |
|
|
|
|
| |
| _generator = None |
|
|
|
|
| def get_idle_generator(): |
| """Get or create the idle video generator (requires FLOAT to be initialized first).""" |
| global _generator |
| if _generator is None: |
| from float_lipsync import get_lipsync |
| lipsync = get_lipsync() |
| if not lipsync.ready: |
| raise RuntimeError("FLOAT not initialized — cannot generate idle videos") |
| _generator = IdleVideoGenerator( |
| float_model=lipsync.model, |
| device=lipsync.device, |
| input_size=lipsync.opt.input_size, |
| lipsync_instance=lipsync, |
| ) |
| return _generator |