"""
Idle Video Generator — generates natural-looking idle animation clips using FLOAT.
Uses scripted pose control with dampening curves for realistic idle behavior.
Extracted from standalone script for integration into avatar creation pipeline.
"""
import os
import sys
import math
import random
import time
import logging
import torch
import cv2
import numpy as np

logger = logging.getLogger(__name__)

FLOAT_REPO_PATH = "/app/float_repo"
if FLOAT_REPO_PATH not in sys.path:
    sys.path.insert(0, FLOAT_REPO_PATH)

# Generation settings
FPS = 25.0
DURATION_SEC = 5.0
SAMPLE_RATE = 16000
NUM_CLIPS = 6

# Clip mode definitions — each clip has different motion characteristics
CLIP_MODES = [
    # Clips 1-2: Wide Variable (35%-65%)
    {"damp_mid": 0.50, "damp_amp": 0.15, "label": "Wide Variable"},
    {"damp_mid": 0.50, "damp_amp": 0.15, "label": "Wide Variable"},
    # Clips 3-5: High Fixed (55%-75%)
    {"damp_mid": 0.65, "damp_amp": 0.10, "label": "High Fixed"},
    {"damp_mid": 0.65, "damp_amp": 0.10, "label": "High Fixed"},
    {"damp_mid": 0.65, "damp_amp": 0.10, "label": "High Fixed"},
    # Clip 6: Very High Activity (80%-90%)
    {"damp_mid": 0.85, "damp_amp": 0.05, "label": "Very High Activity"},
]


class IdleVideoGenerator:
    """Generates idle animation clips using FLOAT model with scripted pose control."""

    def __init__(self, float_model, device, input_size=512, lipsync_instance=None):
        """
        Args:
            float_model: Loaded FLOAT model instance (already on device, eval mode)
            device: torch device
            input_size: image size (default 512)
            lipsync_instance: FloatLipsync instance for reusing its face crop logic
        """
        self.model = float_model
        self.device = device
        self.input_size = input_size
        self.lipsync = lipsync_instance

        # Store original sample function for monkey-patching
        self._original_sample = self.model.sample

    def _load_reference_image(self, image_path):
        """Load and preprocess a reference image using float_lipsync's proven crop logic."""
        if self.lipsync:
            # Use the same crop/transform as float_lipsync for consistency
            self.lipsync._preload_reference_image(image_path)
            return self.lipsync.preprocessed_ref_image.clone()
        else:
            # Fallback: basic resize without face crop
            import albumentations as A
            import albumentations.pytorch.transforms as A_pytorch
            img = cv2.imread(image_path)
            if img is None:
                raise FileNotFoundError(f"Could not read image: {image_path}")
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            transform = A.Compose([
                A.Resize(height=self.input_size, width=self.input_size, interpolation=cv2.INTER_AREA),
                A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
                A_pytorch.ToTensorV2(),
            ])
            return transform(image=img)['image'].unsqueeze(0).to(self.device)

    def generate(self, ref_image_path, output_dir, avatar_name,
                 num_clips=NUM_CLIPS, progress_callback=None):
        """
        Generate idle video clips for an avatar.

        Args:
            ref_image_path: Path to reference face image
            output_dir: Directory to save clips (e.g. /app/avatars/Darwin/idlevideos/)
            avatar_name: Name for file naming (e.g. "Darwin")
            num_clips: Number of clips to generate (default 10)
            progress_callback: Optional function(clip_index, total, status_text) for progress updates

        Returns:
            List of generated video file paths
        """
        os.makedirs(output_dir, exist_ok=True)

        total_frames = int(DURATION_SEC * FPS)
        fade_frames = int(1.0 * FPS)

        # Load reference image
        if progress_callback:
            progress_callback(0, num_clips, "Loading reference image...")

        s = self._load_reference_image(ref_image_path)

        # Silent audio (idle = no speech)
        a = torch.zeros(1, int(DURATION_SEC * SAMPLE_RATE)).to(self.device)
        data = {'s': s, 'a': a}

        # Set up dampening curve for monkey-patching
        dampening_curve = torch.zeros(1, total_frames, 1).to(self.device)

        original_sample = self._original_sample

        def dynamic_dampened_sample(*args, **kwargs):
            r_d = original_sample(*args, **kwargs)
            return r_d * dampening_curve

        # Monkey-patch the sample function
        self.model.sample = dynamic_dampened_sample

        generated_paths = []

        try:
            for clip_idx in range(num_clips):
                clip_num = clip_idx + 1
                mode = CLIP_MODES[clip_idx % len(CLIP_MODES)]

                status = f"Generating clip {clip_num}/{num_clips} ({mode['label']})"
                logger.info(f"[IDLE_GEN] {status}")
                if progress_callback:
                    progress_callback(clip_idx, num_clips, status)

                t0 = time.time()

                # Build pose control tensor
                pose_control = torch.zeros(1, total_frames, 20).to(self.device)

                # Randomize phases for variety
                pitch_phase = random.uniform(0, 2 * math.pi)
                yaw_phase = random.uniform(0, 2 * math.pi)
                mouth_phase = random.uniform(0, 2 * math.pi)
                blend_phase = random.uniform(0, 2 * math.pi)
                damp_phase = random.uniform(0, 2 * math.pi)

                for t in range(total_frames):
                    # Dampening curve
                    dampening_curve[0, t, 0] = mode["damp_mid"] + mode["damp_amp"] * math.sin(t * 0.06 + damp_phase)

                    # Subtle head sway
                    pose_control[:, t, 1] = 0.08 * math.sin(t * 0.05 + pitch_phase)
                    pose_control[:, t, 2] = 0.04 * math.cos(t * 0.03 + yaw_phase)

                    # Mouth clamp (peak 5.0, min 2.0)
                    overall_tension = 3.5 + 1.5 * math.sin(t * 0.08 + mouth_phase)
                    blend = (math.sin(t * 0.04 + blend_phase) + 1.0) / 2.0

                    pose_control[:, t, 5] = overall_tension * blend
                    pose_control[:, t, 9] = overall_tension * (1.0 - blend)

                    # 1-second fade out at end
                    if t >= total_frames - fade_frames:
                        fade_multiplier = (total_frames - t - 1) / float(fade_frames)
                        pose_control[:, t, :] *= fade_multiplier

                # Run inference — pass pose_control via data['p']
                data['p'] = pose_control
                with torch.no_grad():
                    out = self.model.inference(
                        data,
                        a_cfg_scale=1.0,
                        r_cfg_scale=1.0,
                        e_cfg_scale=1.0,
                        emo='S2E',
                        nfe=7,
                        seed=random.randint(1, 10000),
                    )

                # Save video
                d_hat = out['d_hat'].cpu().clamp(-1, 1)
                d_hat = ((d_hat + 1) / 2 * 255).to(torch.uint8)
                frames = d_hat.permute(0, 2, 3, 1).numpy()

                out_name = f"{avatar_name}_idle_{clip_num}.mp4"
                out_path = os.path.join(output_dir, out_name)
                self._save_video(frames, out_path)

                generated_paths.append(out_path)
                elapsed = time.time() - t0
                logger.info(f"[IDLE_GEN] ✓ {out_name} ({elapsed:.1f}s)")

                if progress_callback:
                    progress_callback(clip_idx + 1, num_clips, f"Clip {clip_num}/{num_clips} done ({elapsed:.0f}s)")

        finally:
            # Restore original sample function
            self.model.sample = original_sample

        logger.info(f"[IDLE_GEN] ✓ All {len(generated_paths)} clips generated")
        return generated_paths

    def _save_video(self, frames_np, output_path):
        """Save numpy frames array to mp4 using ffmpeg."""
        import subprocess

        height, width = frames_np.shape[1], frames_np.shape[2]

        cmd = [
            'ffmpeg', '-y',
            '-f', 'rawvideo', '-vcodec', 'rawvideo',
            '-s', f'{width}x{height}', '-pix_fmt', 'rgb24',
            '-r', str(FPS),
            '-i', 'pipe:0',
            '-c:v', 'libx264', '-preset', 'fast', '-crf', '23',
            '-pix_fmt', 'yuv420p',
            output_path
        ]

        process = subprocess.Popen(
            cmd, stdin=subprocess.PIPE,
            stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
        stdout, stderr = process.communicate(input=frames_np.tobytes())

        if process.returncode != 0:
            logger.error(f"[IDLE_GEN] ffmpeg error: {stderr.decode()[:300]}")
            raise RuntimeError("ffmpeg encoding failed")


# Module-level singleton
_generator = None


def get_idle_generator():
    """Get or create the idle video generator (requires FLOAT to be initialized first)."""
    global _generator
    if _generator is None:
        from float_lipsync import get_lipsync
        lipsync = get_lipsync()
        if not lipsync.ready:
            raise RuntimeError("FLOAT not initialized — cannot generate idle videos")
        _generator = IdleVideoGenerator(
            float_model=lipsync.model,
            device=lipsync.device,
            input_size=lipsync.opt.input_size,
            lipsync_instance=lipsync,
        )
    return _generator