# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Hugging Face Spaces app for Depth Anything 3. The same codebase is intended to run on both: - ZeroGPU: GPU work is routed through @spaces.GPU. - Standard GPU hardware such as L40S: inference runs directly in the app process. No Space variable is required for normal use. HF-provided ZeroGPU runtime markers are preferred for auto-detection, and optional DA3_* variables remain for debugging and emergency overrides. """ from __future__ import annotations import os def _truthy_env_early(name: str) -> bool: value = os.environ.get(name) return value is not None and value.strip().lower() in {"1", "true", "yes", "on"} def _normalize_backend_early(value: str) -> str: aliases = { "zero": "zerogpu", "zero-gpu": "zerogpu", "zero_gpu": "zerogpu", "zgpu": "zerogpu", } value = value.strip().lower() return aliases.get(value, value) def _wants_zerogpu_early() -> bool: requested = _normalize_backend_early(os.environ.get("DA3_GPU_BACKEND", "auto")) if requested == "zerogpu": return True if requested in {"standard", "cpu"}: return False return any(_truthy_env_early(name) for name in ( "SPACES_ZERO_GPU", "ZEROGPU_V2", "SPACE_ZERO_GPU", "ZERO_GPU", "ZEROGPU", "HF_SPACE_ZERO_GPU", "SPACES_ZEROGPU", "SPACE_ZEROGPU", )) # ZeroGPU requires `spaces` to be imported before CUDA-related packages. # Keep this block before any local imports that may import torch/gsplat. _SPACES_MODULE = None _SPACES_IMPORT_ERROR = None if _wants_zerogpu_early(): try: import spaces as _SPACES_MODULE # noqa: E402 @_SPACES_MODULE.GPU(duration=1) def _da3_zerogpu_startup_probe() -> str: """No-op function so ZeroGPU sees a decorated function at startup.""" return "ok" print("[DA3] ZeroGPU startup probe registered (@spaces.GPU duration=1)") except Exception as exc: # pragma: no cover - only occurs in hosted runtime edge cases _SPACES_IMPORT_ERROR = exc print(f"[DA3] Early ZeroGPU `spaces` import failed: {exc!r}") from depth_anything_3.app.runtime_assets import maybe_sync_examples_on_startup from depth_anything_3.app.runtime_config import configure_torch_runtime, detect_runtime_backend_decision configure_torch_runtime() from depth_anything_3.app.gradio_app import DepthAnything3App # noqa: E402 from depth_anything_3.app.modules.model_inference import ModelInference # noqa: E402 DEFAULT_MODEL_REPO = "depth-anything/DA3NESTED-GIANT-LARGE-1.1" def _install_gpu_wrapper() -> str: """Install the correct inference wrapper for the selected hardware backend.""" decision = detect_runtime_backend_decision() backend = decision.backend print(f"[DA3] Backend decision: {backend} ({decision.reason}; marker={decision.marker or 'none'})") os.environ["DA3_RUNTIME_BACKEND_DECISION"] = backend os.environ["DA3_RUNTIME_BACKEND_REASON"] = decision.reason os.environ["DA3_RUNTIME_BACKEND_MARKER"] = decision.marker or "none" default_duration = int(os.environ.get("DA3_ZEROGPU_DURATION", "120")) min_duration = int(os.environ.get("DA3_ZEROGPU_DURATION_MIN", "30")) max_duration = int(os.environ.get("DA3_ZEROGPU_DURATION_MAX", "120")) size = os.environ.get("DA3_ZEROGPU_SIZE", "large") def _clamp_duration(value: int) -> int: return max(min_duration, min(max_duration, int(value))) def _estimate_duration_from_args(*args, **kwargs) -> int: # `@spaces.GPU(duration=callable)` passes the decorated function inputs here. # Keep this estimator side-effect-free and independent of CUDA state. requested = kwargs.get("zerogpu_duration_s", None) if requested is None and len(args) >= 11: requested = args[10] try: if requested is not None and str(requested).strip().lower() not in {"", "auto", "auto (recommended)"}: return _clamp_duration(int(float(requested))) except Exception: pass process_res_method = kwargs.get("process_res_method", None) infer_gs = bool(kwargs.get("infer_gs", False)) if process_res_method is None and len(args) >= 5: process_res_method = args[4] if len(args) >= 9: infer_gs = bool(args[8]) if infer_gs: return _clamp_duration(120) if str(process_res_method).lower() == "high_res": return _clamp_duration(120) return _clamp_duration(default_duration) original_run_inference = ModelInference.run_inference if backend == "zerogpu": spaces_mod = _SPACES_MODULE if spaces_mod is None: if _SPACES_IMPORT_ERROR is not None: print(f"[DA3] ZeroGPU requested but early `spaces` import failed: {_SPACES_IMPORT_ERROR!r}") print("[DA3] Falling back to standard direct GPU/CPU inference.") return "standard" try: import spaces as spaces_mod except Exception as exc: print(f"[DA3] ZeroGPU requested but `spaces` import failed: {exc!r}") print("[DA3] Falling back to standard direct GPU/CPU inference.") return "standard" @spaces_mod.GPU(duration=_estimate_duration_from_args, size=size) def gpu_run_inference(self, *args, **kwargs): return original_run_inference(self, *args, **kwargs) ModelInference.run_inference = gpu_run_inference os.environ["DA3_RUNTIME_BACKEND_EFFECTIVE"] = "zerogpu" os.environ["DA3_RUNTIME_DEVICE_NAME"] = "RTX PRO 6000 Blackwell (allocated dynamically)" os.environ["DA3_RUNTIME_GPU_MODE"] = f"@spaces.GPU dynamic duration, size={size}" os.environ["DA3_RUNTIME_ZEROGPU_DEFAULT_DURATION"] = str(default_duration) os.environ["DA3_RUNTIME_ZEROGPU_MIN_DURATION"] = str(min_duration) os.environ["DA3_RUNTIME_ZEROGPU_MAX_DURATION"] = str(max_duration) print( "[DA3] Runtime backend: ZeroGPU " f"(@spaces.GPU dynamic duration, size={size}, " f"default={default_duration}, min={min_duration}, max={max_duration})" ) return "zerogpu" if backend == "cpu": os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ["DA3_RUNTIME_BACKEND_EFFECTIVE"] = "cpu" os.environ["DA3_RUNTIME_DEVICE_NAME"] = "CPU" os.environ["DA3_RUNTIME_GPU_MODE"] = "No CUDA GPU detected" print("[DA3] Runtime backend: CPU") return "cpu" device_name = "CUDA GPU" try: import torch if torch.cuda.is_available(): device_name = torch.cuda.get_device_name(0) except Exception: pass os.environ["DA3_RUNTIME_BACKEND_EFFECTIVE"] = "standard" os.environ["DA3_RUNTIME_DEVICE_NAME"] = device_name os.environ["DA3_RUNTIME_GPU_MODE"] = "Direct CUDA inference" print("[DA3] Runtime backend: standard direct GPU/CPU") return "standard" def main() -> None: runtime_backend = _install_gpu_wrapper() model_dir = os.environ.get("DA3_MODEL_DIR", DEFAULT_MODEL_REPO) workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "workspace/gradio") gallery_dir = os.environ.get("DA3_GALLERY_DIR", "workspace/gallery") os.makedirs(workspace_dir, exist_ok=True) os.makedirs(gallery_dir, exist_ok=True) synced, sync_message = maybe_sync_examples_on_startup(workspace_dir) print(f"[DA3] Example asset sync: {sync_message}") app = DepthAnything3App( model_dir=model_dir, workspace_dir=workspace_dir, gallery_dir=gallery_dir, ) cache_examples_env = os.environ.get("DA3_CACHE_EXAMPLES", "false").lower() cache_examples = cache_examples_env in {"true", "1", "yes", "on"} cache_gs_tag = os.environ.get("DA3_CACHE_GS_TAG", "") print("🚀 Launching Depth Anything 3 on Hugging Face Spaces...") print(f"📦 Model Directory: {model_dir}") print(f"📁 Workspace Directory: {workspace_dir}") print(f"🖼️ Gallery Directory: {gallery_dir}") print(f"⚙️ Runtime Backend: {runtime_backend}") print(f"⬇️ Examples Synced This Startup: {synced}") print(f"💾 Cache Examples: {cache_examples}") if cache_examples: print("\n" + "=" * 60) print("Pre-caching mode enabled") print("This is disabled by default because it performs heavy inference at startup.") if cache_gs_tag: print(f"Scenes containing '{cache_gs_tag}' will use HIGH-RES + 3DGS") else: print("All scenes will use LOW-RES only") print("=" * 60) app.cache_examples( show_cam=True, filter_black_bg=False, filter_white_bg=False, save_percentage=5.0, num_max_points=1000, cache_gs_tag=cache_gs_tag, gs_trj_mode="smooth", gs_video_quality="low", ) app.launch(host="0.0.0.0", port=7860, share=False) if __name__ == "__main__": main()