# NVIDIA Cosmos Predict 2.5 Demo # HuggingFace Space with ZeroGPU # Gradio 5.9.0 compatible import sys import os import time import tempfile import traceback # ---- HOTFIX: Gradio /api_info boolean-schema crash ---- # Fix for TypeError: argument of type 'bool' is not iterable # in gradio_client.utils.get_type() when JSON Schema has # additionalProperties as boolean try: import gradio_client.utils as gcu _orig_get_type = gcu.get_type def _patched_get_type(schema): if isinstance(schema, bool): return "boolean" return _orig_get_type(schema) gcu.get_type = _patched_get_type print("[patch] gradio_client.utils.get_type bool-schema hotfix enabled", flush=True) except Exception as e: print(f"[patch] skipped: {e}", flush=True) # ---- END HOTFIX ---- # Ensure local cosmos_guardrail mock is found _app_dir = os.path.dirname(os.path.abspath(__file__)) if _app_dir not in sys.path: sys.path.insert(0, _app_dir) # Verify mock is available try: from cosmos_guardrail import CosmosSafetyChecker as MockSafetyChecker MOCK_AVAILABLE = True print(f"cosmos_guardrail mock loaded from: {_app_dir}") except ImportError as e: MOCK_AVAILABLE = False print(f"Warning: cosmos_guardrail mock not found: {e}") def patch_diffusers_guardrail(): """Patch diffusers to use our mock cosmos_guardrail.""" if not MOCK_AVAILABLE: return False try: from cosmos_guardrail import CosmosSafetyChecker import diffusers.utils.import_utils as diffusers_import_utils diffusers_import_utils._cosmos_guardrail_available = True diffusers_import_utils._cosmos_guardrail_version = "0.1.0-mock" diffusers_import_utils.is_cosmos_guardrail_available = lambda: True import diffusers.pipelines.cosmos.pipeline_cosmos2_video2world as cosmos_pipeline cosmos_pipeline.is_cosmos_guardrail_available = lambda: True cosmos_pipeline.CosmosSafetyChecker = CosmosSafetyChecker print("Successfully patched diffusers to use mock cosmos_guardrail") return True except Exception as e: print(f"Warning: Could not patch diffusers: {e}") return False import gradio as gr import torch from PIL import Image # Import spaces for ZeroGPU try: import spaces HAS_SPACES = True except ImportError: HAS_SPACES = False print("Warning: spaces module not available.") # Patch before importing pipeline patch_diffusers_guardrail() # Import Cosmos pipeline Cosmos2_5_PredictBasePipeline = None export_to_video = None PIPELINE_AVAILABLE = False PIPELINE_ERROR = None try: from diffusers import Cosmos2_5_PredictBasePipeline from diffusers.utils import export_to_video PIPELINE_AVAILABLE = True print("Cosmos2_5_PredictBasePipeline loaded! (Predict 2.5)") except ImportError as e: PIPELINE_ERROR = str(e) print(f"Warning: Could not import Cosmos 2.5 pipeline: {e}") # Global pipeline pipe = None def get_pipeline(): """Lazy load the Cosmos Predict 2.5 pipeline""" global pipe if not PIPELINE_AVAILABLE: raise gr.Error(f"Pipeline not available: {PIPELINE_ERROR}") if pipe is None: print("Loading Cosmos-Predict2.5-2B...") hf_token = os.environ.get("HF_TOKEN") if hf_token: print(f"Using HF_TOKEN: length={len(hf_token)}") else: print("WARNING: No HF_TOKEN found!") try: pipe = Cosmos2_5_PredictBasePipeline.from_pretrained( "nvidia/Cosmos-Predict2.5-2B", revision="diffusers/base/post-trained", torch_dtype=torch.bfloat16, token=hf_token if hf_token else True ) print("Pipeline loaded!") except Exception as e: error_str = str(e) if "403" in error_str or "gated" in error_str.lower(): raise gr.Error( "MODEL ACCESS DENIED: Accept license at " "https://huggingface.co/nvidia/Cosmos-Predict2.5-2B" ) raise gr.Error(f"Failed to load pipeline: {error_str}") return pipe def check_environment(): """Check environment (no GPU needed)""" lines = [ f"Python: {sys.version.split()[0]}", f"PyTorch: {torch.__version__}", f"Gradio: {gr.__version__}", f"CUDA available: {torch.cuda.is_available()}", f"Pipeline available: {PIPELINE_AVAILABLE}", ] if PIPELINE_ERROR: lines.append(f"Pipeline error: {PIPELINE_ERROR}") if torch.cuda.is_available(): lines.append(f"GPU: {torch.cuda.get_device_name(0)}") free, total = torch.cuda.mem_get_info(0) lines.append(f"VRAM: {free/(1024**3):.1f}/{total/(1024**3):.1f} GB") return "\n".join(lines) @spaces.GPU(duration=30) def simple_gpu_test(): """Simple GPU test to check ZeroGPU quota""" try: x = torch.randn(100, 100, device="cuda") y = torch.matmul(x, x) lines = [ "Status: OK", f"GPU: {torch.cuda.get_device_name(0)}", f"Compute result: {float(y.sum().cpu()):.2f}", ] return "\n".join(lines) except Exception as e: raise gr.Error(f"GPU test failed: {str(e)}\n{traceback.format_exc()}") @spaces.GPU(duration=600) def run_text2world( prompt: str, negative_prompt: str, num_frames: int, height: int, width: int, num_inference_steps: int, guidance_scale: float, seed: int ): """Run Cosmos Predict 2.5 Text2World inference""" try: start_time = time.time() log = [f"=== TEXT2WORLD ==="] log.append(f"Prompt: {prompt[:50]}...") log.append(f"Resolution: {width}x{height}, Frames: {num_frames}") # Get pipeline log.append("Loading pipeline...") pipeline = get_pipeline() pipeline.to("cuda") log.append("Pipeline ready!") # Set seed generator = torch.Generator(device="cuda").manual_seed(int(seed)) # Run inference log.append("Running inference...") output = pipeline( image=None, video=None, prompt=prompt, negative_prompt=negative_prompt, height=int(height), width=int(width), num_frames=int(num_frames), num_inference_steps=int(num_inference_steps), guidance_scale=float(guidance_scale), generator=generator ) frames = output.frames[0] output_path = tempfile.mktemp(suffix=".mp4") export_to_video(frames, output_path, fps=16) elapsed = time.time() - start_time log.append(f"Done in {elapsed:.1f}s, {len(frames)} frames") return output_path, "\n".join(log) except Exception as e: error_msg = f"ERROR: {str(e)}\n{traceback.format_exc()}" raise gr.Error(error_msg) @spaces.GPU(duration=600) def run_image2world( image, prompt: str, negative_prompt: str, num_frames: int, num_inference_steps: int, guidance_scale: float, seed: int ): """Run Cosmos Predict 2.5 Image2World inference""" if image is None: raise gr.Error("Please upload an image") try: start_time = time.time() log = [f"=== IMAGE2WORLD ==="] log.append(f"Prompt: {prompt[:50]}...") # Prepare image if isinstance(image, str): image = Image.open(image) if not isinstance(image, Image.Image): image = Image.fromarray(image) # Get dimensions width, height = image.size log.append(f"Input: {width}x{height}, Frames: {num_frames}") # Get pipeline log.append("Loading pipeline...") pipeline = get_pipeline() pipeline.to("cuda") generator = torch.Generator(device="cuda").manual_seed(int(seed)) # Run inference log.append("Running inference...") output = pipeline( image=image, video=None, prompt=prompt, negative_prompt=negative_prompt, num_frames=int(num_frames), num_inference_steps=int(num_inference_steps), guidance_scale=float(guidance_scale), generator=generator ) frames = output.frames[0] output_path = tempfile.mktemp(suffix=".mp4") export_to_video(frames, output_path, fps=16) elapsed = time.time() - start_time log.append(f"Done in {elapsed:.1f}s, {len(frames)} frames") return output_path, "\n".join(log) except Exception as e: raise gr.Error(f"ERROR: {str(e)}\n{traceback.format_exc()}") # Build UI with gr.Blocks(title="Cosmos Predict 2.5 Demo") as demo: gr.Markdown("# NVIDIA Cosmos Predict 2.5 Demo") gr.Markdown("Generate video worlds from text or images using Cosmos-Predict2.5-2B") with gr.Tabs(): # Text2World Tab with gr.TabItem("Text2World"): with gr.Row(): with gr.Column(): t2w_prompt = gr.Textbox( label="Prompt", value="A futuristic city at sunset with flying cars", lines=3 ) t2w_negative = gr.Textbox( label="Negative Prompt", value="low quality, blurry, distorted" ) with gr.Row(): t2w_width = gr.Dropdown([704, 1280], value=704, label="Width") t2w_height = gr.Dropdown([480, 704], value=480, label="Height") with gr.Row(): t2w_frames = gr.Slider(17, 93, step=8, value=33, label="Frames") t2w_steps = gr.Slider(10, 50, step=5, value=25, label="Steps") with gr.Row(): t2w_cfg = gr.Slider(1.0, 15.0, step=0.5, value=7.0, label="CFG") t2w_seed = gr.Number(value=42, label="Seed", precision=0) t2w_btn = gr.Button("Generate Video", variant="primary") with gr.Column(): t2w_video = gr.Video(label="Generated Video") t2w_log = gr.Textbox(label="Log", lines=10) t2w_btn.click( fn=run_text2world, inputs=[t2w_prompt, t2w_negative, t2w_frames, t2w_height, t2w_width, t2w_steps, t2w_cfg, t2w_seed], outputs=[t2w_video, t2w_log], api_name="text2world" ) # Image2World Tab with gr.TabItem("Image2World"): with gr.Row(): with gr.Column(): i2w_image = gr.Image(label="Input Image", type="pil") i2w_prompt = gr.Textbox( label="Prompt", value="The scene comes to life with motion", lines=2 ) i2w_negative = gr.Textbox( label="Negative Prompt", value="static, frozen, low quality" ) with gr.Row(): i2w_frames = gr.Slider(17, 93, step=8, value=33, label="Frames") i2w_steps = gr.Slider(10, 50, step=5, value=25, label="Steps") with gr.Row(): i2w_cfg = gr.Slider(1.0, 15.0, step=0.5, value=7.0, label="CFG") i2w_seed = gr.Number(value=42, label="Seed", precision=0) i2w_btn = gr.Button("Animate Image", variant="primary") with gr.Column(): i2w_video = gr.Video(label="Generated Video") i2w_log = gr.Textbox(label="Log", lines=10) i2w_btn.click( fn=run_image2world, inputs=[i2w_image, i2w_prompt, i2w_negative, i2w_frames, i2w_steps, i2w_cfg, i2w_seed], outputs=[i2w_video, i2w_log], api_name="image2world" ) # Diagnostics with gr.Accordion("System Info", open=False): env_btn = gr.Button("Check Environment (CPU)") env_out = gr.Textbox(label="Environment", lines=8) env_btn.click(fn=check_environment, outputs=[env_out], api_name="check_env") gpu_test_btn = gr.Button("Test GPU (ZeroGPU)") gpu_test_out = gr.Textbox(label="GPU Test Result", lines=5) gpu_test_btn.click(fn=simple_gpu_test, outputs=[gpu_test_out], api_name="gpu_test") demo.launch(server_name="0.0.0.0", server_port=7860)