# NVIDIA Cosmos Predict 2.5 Demo
# HuggingFace Space with ZeroGPU
# Gradio 5.9.0 compatible

import sys
import os
import time
import tempfile
import traceback

# ---- HOTFIX: Gradio /api_info boolean-schema crash ----
# Fix for TypeError: argument of type 'bool' is not iterable
# in gradio_client.utils.get_type() when JSON Schema has
# additionalProperties as boolean
try:
    import gradio_client.utils as gcu
    _orig_get_type = gcu.get_type

    def _patched_get_type(schema):
        if isinstance(schema, bool):
            return "boolean"
        return _orig_get_type(schema)

    gcu.get_type = _patched_get_type
    print("[patch] gradio_client.utils.get_type bool-schema hotfix enabled", flush=True)
except Exception as e:
    print(f"[patch] skipped: {e}", flush=True)
# ---- END HOTFIX ----

# Ensure local cosmos_guardrail mock is found
_app_dir = os.path.dirname(os.path.abspath(__file__))
if _app_dir not in sys.path:
    sys.path.insert(0, _app_dir)

# Verify mock is available
try:
    from cosmos_guardrail import CosmosSafetyChecker as MockSafetyChecker
    MOCK_AVAILABLE = True
    print(f"cosmos_guardrail mock loaded from: {_app_dir}")
except ImportError as e:
    MOCK_AVAILABLE = False
    print(f"Warning: cosmos_guardrail mock not found: {e}")


def patch_diffusers_guardrail():
    """Patch diffusers to use our mock cosmos_guardrail."""
    if not MOCK_AVAILABLE:
        return False
    try:
        from cosmos_guardrail import CosmosSafetyChecker
        import diffusers.utils.import_utils as diffusers_import_utils
        diffusers_import_utils._cosmos_guardrail_available = True
        diffusers_import_utils._cosmos_guardrail_version = "0.1.0-mock"
        diffusers_import_utils.is_cosmos_guardrail_available = lambda: True

        import diffusers.pipelines.cosmos.pipeline_cosmos2_video2world as cosmos_pipeline
        cosmos_pipeline.is_cosmos_guardrail_available = lambda: True
        cosmos_pipeline.CosmosSafetyChecker = CosmosSafetyChecker
        print("Successfully patched diffusers to use mock cosmos_guardrail")
        return True
    except Exception as e:
        print(f"Warning: Could not patch diffusers: {e}")
        return False


import gradio as gr
import torch
from PIL import Image

# Import spaces for ZeroGPU
try:
    import spaces
    HAS_SPACES = True
except ImportError:
    HAS_SPACES = False
    print("Warning: spaces module not available.")

# Patch before importing pipeline
patch_diffusers_guardrail()

# Import Cosmos pipeline
Cosmos2_5_PredictBasePipeline = None
export_to_video = None
PIPELINE_AVAILABLE = False
PIPELINE_ERROR = None

try:
    from diffusers import Cosmos2_5_PredictBasePipeline
    from diffusers.utils import export_to_video
    PIPELINE_AVAILABLE = True
    print("Cosmos2_5_PredictBasePipeline loaded! (Predict 2.5)")
except ImportError as e:
    PIPELINE_ERROR = str(e)
    print(f"Warning: Could not import Cosmos 2.5 pipeline: {e}")

# Global pipeline
pipe = None


def get_pipeline():
    """Lazy load the Cosmos Predict 2.5 pipeline"""
    global pipe
    if not PIPELINE_AVAILABLE:
        raise gr.Error(f"Pipeline not available: {PIPELINE_ERROR}")

    if pipe is None:
        print("Loading Cosmos-Predict2.5-2B...")
        hf_token = os.environ.get("HF_TOKEN")
        if hf_token:
            print(f"Using HF_TOKEN: length={len(hf_token)}")
        else:
            print("WARNING: No HF_TOKEN found!")

        try:
            pipe = Cosmos2_5_PredictBasePipeline.from_pretrained(
                "nvidia/Cosmos-Predict2.5-2B",
                revision="diffusers/base/post-trained",
                torch_dtype=torch.bfloat16,
                token=hf_token if hf_token else True
            )
            print("Pipeline loaded!")
        except Exception as e:
            error_str = str(e)
            if "403" in error_str or "gated" in error_str.lower():
                raise gr.Error(
                    "MODEL ACCESS DENIED: Accept license at "
                    "https://huggingface.co/nvidia/Cosmos-Predict2.5-2B"
                )
            raise gr.Error(f"Failed to load pipeline: {error_str}")
    return pipe


def check_environment():
    """Check environment (no GPU needed)"""
    lines = [
        f"Python: {sys.version.split()[0]}",
        f"PyTorch: {torch.__version__}",
        f"Gradio: {gr.__version__}",
        f"CUDA available: {torch.cuda.is_available()}",
        f"Pipeline available: {PIPELINE_AVAILABLE}",
    ]
    if PIPELINE_ERROR:
        lines.append(f"Pipeline error: {PIPELINE_ERROR}")
    if torch.cuda.is_available():
        lines.append(f"GPU: {torch.cuda.get_device_name(0)}")
        free, total = torch.cuda.mem_get_info(0)
        lines.append(f"VRAM: {free/(1024**3):.1f}/{total/(1024**3):.1f} GB")
    return "\n".join(lines)


@spaces.GPU(duration=30)
def simple_gpu_test():
    """Simple GPU test to check ZeroGPU quota"""
    try:
        x = torch.randn(100, 100, device="cuda")
        y = torch.matmul(x, x)
        lines = [
            "Status: OK",
            f"GPU: {torch.cuda.get_device_name(0)}",
            f"Compute result: {float(y.sum().cpu()):.2f}",
        ]
        return "\n".join(lines)
    except Exception as e:
        raise gr.Error(f"GPU test failed: {str(e)}\n{traceback.format_exc()}")


@spaces.GPU(duration=600)
def run_text2world(
    prompt: str,
    negative_prompt: str,
    num_frames: int,
    height: int,
    width: int,
    num_inference_steps: int,
    guidance_scale: float,
    seed: int
):
    """Run Cosmos Predict 2.5 Text2World inference"""
    try:
        start_time = time.time()
        log = [f"=== TEXT2WORLD ==="]
        log.append(f"Prompt: {prompt[:50]}...")
        log.append(f"Resolution: {width}x{height}, Frames: {num_frames}")

        # Get pipeline
        log.append("Loading pipeline...")
        pipeline = get_pipeline()
        pipeline.to("cuda")
        log.append("Pipeline ready!")

        # Set seed
        generator = torch.Generator(device="cuda").manual_seed(int(seed))

        # Run inference
        log.append("Running inference...")
        output = pipeline(
            image=None,
            video=None,
            prompt=prompt,
            negative_prompt=negative_prompt,
            height=int(height),
            width=int(width),
            num_frames=int(num_frames),
            num_inference_steps=int(num_inference_steps),
            guidance_scale=float(guidance_scale),
            generator=generator
        )

        frames = output.frames[0]
        output_path = tempfile.mktemp(suffix=".mp4")
        export_to_video(frames, output_path, fps=16)

        elapsed = time.time() - start_time
        log.append(f"Done in {elapsed:.1f}s, {len(frames)} frames")
        return output_path, "\n".join(log)

    except Exception as e:
        error_msg = f"ERROR: {str(e)}\n{traceback.format_exc()}"
        raise gr.Error(error_msg)


@spaces.GPU(duration=600)
def run_image2world(
    image,
    prompt: str,
    negative_prompt: str,
    num_frames: int,
    num_inference_steps: int,
    guidance_scale: float,
    seed: int
):
    """Run Cosmos Predict 2.5 Image2World inference"""
    if image is None:
        raise gr.Error("Please upload an image")

    try:
        start_time = time.time()
        log = [f"=== IMAGE2WORLD ==="]
        log.append(f"Prompt: {prompt[:50]}...")

        # Prepare image
        if isinstance(image, str):
            image = Image.open(image)
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        # Get dimensions
        width, height = image.size
        log.append(f"Input: {width}x{height}, Frames: {num_frames}")

        # Get pipeline
        log.append("Loading pipeline...")
        pipeline = get_pipeline()
        pipeline.to("cuda")

        generator = torch.Generator(device="cuda").manual_seed(int(seed))

        # Run inference
        log.append("Running inference...")
        output = pipeline(
            image=image,
            video=None,
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_frames=int(num_frames),
            num_inference_steps=int(num_inference_steps),
            guidance_scale=float(guidance_scale),
            generator=generator
        )

        frames = output.frames[0]
        output_path = tempfile.mktemp(suffix=".mp4")
        export_to_video(frames, output_path, fps=16)

        elapsed = time.time() - start_time
        log.append(f"Done in {elapsed:.1f}s, {len(frames)} frames")
        return output_path, "\n".join(log)

    except Exception as e:
        raise gr.Error(f"ERROR: {str(e)}\n{traceback.format_exc()}")


# Build UI
with gr.Blocks(title="Cosmos Predict 2.5 Demo") as demo:
    gr.Markdown("# NVIDIA Cosmos Predict 2.5 Demo")
    gr.Markdown("Generate video worlds from text or images using Cosmos-Predict2.5-2B")

    with gr.Tabs():
        # Text2World Tab
        with gr.TabItem("Text2World"):
            with gr.Row():
                with gr.Column():
                    t2w_prompt = gr.Textbox(
                        label="Prompt",
                        value="A futuristic city at sunset with flying cars",
                        lines=3
                    )
                    t2w_negative = gr.Textbox(
                        label="Negative Prompt",
                        value="low quality, blurry, distorted"
                    )
                    with gr.Row():
                        t2w_width = gr.Dropdown([704, 1280], value=704, label="Width")
                        t2w_height = gr.Dropdown([480, 704], value=480, label="Height")
                    with gr.Row():
                        t2w_frames = gr.Slider(17, 93, step=8, value=33, label="Frames")
                        t2w_steps = gr.Slider(10, 50, step=5, value=25, label="Steps")
                    with gr.Row():
                        t2w_cfg = gr.Slider(1.0, 15.0, step=0.5, value=7.0, label="CFG")
                        t2w_seed = gr.Number(value=42, label="Seed", precision=0)
                    t2w_btn = gr.Button("Generate Video", variant="primary")

                with gr.Column():
                    t2w_video = gr.Video(label="Generated Video")
                    t2w_log = gr.Textbox(label="Log", lines=10)

            t2w_btn.click(
                fn=run_text2world,
                inputs=[t2w_prompt, t2w_negative, t2w_frames, t2w_height,
                        t2w_width, t2w_steps, t2w_cfg, t2w_seed],
                outputs=[t2w_video, t2w_log],
                api_name="text2world"
            )

        # Image2World Tab
        with gr.TabItem("Image2World"):
            with gr.Row():
                with gr.Column():
                    i2w_image = gr.Image(label="Input Image", type="pil")
                    i2w_prompt = gr.Textbox(
                        label="Prompt",
                        value="The scene comes to life with motion",
                        lines=2
                    )
                    i2w_negative = gr.Textbox(
                        label="Negative Prompt",
                        value="static, frozen, low quality"
                    )
                    with gr.Row():
                        i2w_frames = gr.Slider(17, 93, step=8, value=33, label="Frames")
                        i2w_steps = gr.Slider(10, 50, step=5, value=25, label="Steps")
                    with gr.Row():
                        i2w_cfg = gr.Slider(1.0, 15.0, step=0.5, value=7.0, label="CFG")
                        i2w_seed = gr.Number(value=42, label="Seed", precision=0)
                    i2w_btn = gr.Button("Animate Image", variant="primary")

                with gr.Column():
                    i2w_video = gr.Video(label="Generated Video")
                    i2w_log = gr.Textbox(label="Log", lines=10)

            i2w_btn.click(
                fn=run_image2world,
                inputs=[i2w_image, i2w_prompt, i2w_negative, i2w_frames,
                        i2w_steps, i2w_cfg, i2w_seed],
                outputs=[i2w_video, i2w_log],
                api_name="image2world"
            )

    # Diagnostics
    with gr.Accordion("System Info", open=False):
        env_btn = gr.Button("Check Environment (CPU)")
        env_out = gr.Textbox(label="Environment", lines=8)
        env_btn.click(fn=check_environment, outputs=[env_out], api_name="check_env")

        gpu_test_btn = gr.Button("Test GPU (ZeroGPU)")
        gpu_test_out = gr.Textbox(label="GPU Test Result", lines=5)
        gpu_test_btn.click(fn=simple_gpu_test, outputs=[gpu_test_out], api_name="gpu_test")

demo.launch(server_name="0.0.0.0", server_port=7860)