# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Hugging Face Spaces app for Depth Anything 3.

The same codebase is intended to run on both:
- ZeroGPU: GPU work is routed through @spaces.GPU.
- Standard GPU hardware such as L40S: inference runs directly in the app process.

No Space variable is required for normal use. HF-provided ZeroGPU runtime
markers are preferred for auto-detection, and optional DA3_* variables remain
for debugging and emergency overrides.
"""

from __future__ import annotations

import os


def _truthy_env_early(name: str) -> bool:
    value = os.environ.get(name)
    return value is not None and value.strip().lower() in {"1", "true", "yes", "on"}


def _normalize_backend_early(value: str) -> str:
    aliases = {
        "zero": "zerogpu",
        "zero-gpu": "zerogpu",
        "zero_gpu": "zerogpu",
        "zgpu": "zerogpu",
    }
    value = value.strip().lower()
    return aliases.get(value, value)


def _wants_zerogpu_early() -> bool:
    requested = _normalize_backend_early(os.environ.get("DA3_GPU_BACKEND", "auto"))
    if requested == "zerogpu":
        return True
    if requested in {"standard", "cpu"}:
        return False
    return any(_truthy_env_early(name) for name in (
        "SPACES_ZERO_GPU",
        "ZEROGPU_V2",
        "SPACE_ZERO_GPU",
        "ZERO_GPU",
        "ZEROGPU",
        "HF_SPACE_ZERO_GPU",
        "SPACES_ZEROGPU",
        "SPACE_ZEROGPU",
    ))


# ZeroGPU requires `spaces` to be imported before CUDA-related packages.
# Keep this block before any local imports that may import torch/gsplat.
_SPACES_MODULE = None
_SPACES_IMPORT_ERROR = None
if _wants_zerogpu_early():
    try:
        import spaces as _SPACES_MODULE  # noqa: E402

        @_SPACES_MODULE.GPU(duration=1)
        def _da3_zerogpu_startup_probe() -> str:
            """No-op function so ZeroGPU sees a decorated function at startup."""
            return "ok"

        print("[DA3] ZeroGPU startup probe registered (@spaces.GPU duration=1)")
    except Exception as exc:  # pragma: no cover - only occurs in hosted runtime edge cases
        _SPACES_IMPORT_ERROR = exc
        print(f"[DA3] Early ZeroGPU `spaces` import failed: {exc!r}")


from depth_anything_3.app.runtime_assets import maybe_sync_examples_on_startup
from depth_anything_3.app.runtime_config import configure_torch_runtime, detect_runtime_backend_decision

configure_torch_runtime()

from depth_anything_3.app.gradio_app import DepthAnything3App  # noqa: E402
from depth_anything_3.app.modules.model_inference import ModelInference  # noqa: E402

DEFAULT_MODEL_REPO = "depth-anything/DA3NESTED-GIANT-LARGE-1.1"


def _install_gpu_wrapper() -> str:
    """Install the correct inference wrapper for the selected hardware backend."""
    decision = detect_runtime_backend_decision()
    backend = decision.backend
    print(f"[DA3] Backend decision: {backend} ({decision.reason}; marker={decision.marker or 'none'})")
    os.environ["DA3_RUNTIME_BACKEND_DECISION"] = backend
    os.environ["DA3_RUNTIME_BACKEND_REASON"] = decision.reason
    os.environ["DA3_RUNTIME_BACKEND_MARKER"] = decision.marker or "none"
    default_duration = int(os.environ.get("DA3_ZEROGPU_DURATION", "120"))
    min_duration = int(os.environ.get("DA3_ZEROGPU_DURATION_MIN", "30"))
    max_duration = int(os.environ.get("DA3_ZEROGPU_DURATION_MAX", "120"))
    size = os.environ.get("DA3_ZEROGPU_SIZE", "large")

    def _clamp_duration(value: int) -> int:
        return max(min_duration, min(max_duration, int(value)))

    def _estimate_duration_from_args(*args, **kwargs) -> int:
        # `@spaces.GPU(duration=callable)` passes the decorated function inputs here.
        # Keep this estimator side-effect-free and independent of CUDA state.
        requested = kwargs.get("zerogpu_duration_s", None)
        if requested is None and len(args) >= 11:
            requested = args[10]

        try:
            if requested is not None and str(requested).strip().lower() not in {"", "auto", "auto (recommended)"}:
                return _clamp_duration(int(float(requested)))
        except Exception:
            pass

        process_res_method = kwargs.get("process_res_method", None)
        infer_gs = bool(kwargs.get("infer_gs", False))
        if process_res_method is None and len(args) >= 5:
            process_res_method = args[4]
        if len(args) >= 9:
            infer_gs = bool(args[8])

        if infer_gs:
            return _clamp_duration(120)
        if str(process_res_method).lower() == "high_res":
            return _clamp_duration(120)
        return _clamp_duration(default_duration)

    original_run_inference = ModelInference.run_inference

    if backend == "zerogpu":
        spaces_mod = _SPACES_MODULE
        if spaces_mod is None:
            if _SPACES_IMPORT_ERROR is not None:
                print(f"[DA3] ZeroGPU requested but early `spaces` import failed: {_SPACES_IMPORT_ERROR!r}")
                print("[DA3] Falling back to standard direct GPU/CPU inference.")
                return "standard"
            try:
                import spaces as spaces_mod
            except Exception as exc:
                print(f"[DA3] ZeroGPU requested but `spaces` import failed: {exc!r}")
                print("[DA3] Falling back to standard direct GPU/CPU inference.")
                return "standard"

        @spaces_mod.GPU(duration=_estimate_duration_from_args, size=size)
        def gpu_run_inference(self, *args, **kwargs):
            return original_run_inference(self, *args, **kwargs)

        ModelInference.run_inference = gpu_run_inference
        os.environ["DA3_RUNTIME_BACKEND_EFFECTIVE"] = "zerogpu"
        os.environ["DA3_RUNTIME_DEVICE_NAME"] = "RTX PRO 6000 Blackwell (allocated dynamically)"
        os.environ["DA3_RUNTIME_GPU_MODE"] = f"@spaces.GPU dynamic duration, size={size}"
        os.environ["DA3_RUNTIME_ZEROGPU_DEFAULT_DURATION"] = str(default_duration)
        os.environ["DA3_RUNTIME_ZEROGPU_MIN_DURATION"] = str(min_duration)
        os.environ["DA3_RUNTIME_ZEROGPU_MAX_DURATION"] = str(max_duration)
        print(
            "[DA3] Runtime backend: ZeroGPU "
            f"(@spaces.GPU dynamic duration, size={size}, "
            f"default={default_duration}, min={min_duration}, max={max_duration})"
        )
        return "zerogpu"

    if backend == "cpu":
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
        os.environ["DA3_RUNTIME_BACKEND_EFFECTIVE"] = "cpu"
        os.environ["DA3_RUNTIME_DEVICE_NAME"] = "CPU"
        os.environ["DA3_RUNTIME_GPU_MODE"] = "No CUDA GPU detected"
        print("[DA3] Runtime backend: CPU")
        return "cpu"

    device_name = "CUDA GPU"
    try:
        import torch

        if torch.cuda.is_available():
            device_name = torch.cuda.get_device_name(0)
    except Exception:
        pass
    os.environ["DA3_RUNTIME_BACKEND_EFFECTIVE"] = "standard"
    os.environ["DA3_RUNTIME_DEVICE_NAME"] = device_name
    os.environ["DA3_RUNTIME_GPU_MODE"] = "Direct CUDA inference"
    print("[DA3] Runtime backend: standard direct GPU/CPU")
    return "standard"


def main() -> None:
    runtime_backend = _install_gpu_wrapper()

    model_dir = os.environ.get("DA3_MODEL_DIR", DEFAULT_MODEL_REPO)
    workspace_dir = os.environ.get("DA3_WORKSPACE_DIR", "workspace/gradio")
    gallery_dir = os.environ.get("DA3_GALLERY_DIR", "workspace/gallery")

    os.makedirs(workspace_dir, exist_ok=True)
    os.makedirs(gallery_dir, exist_ok=True)

    synced, sync_message = maybe_sync_examples_on_startup(workspace_dir)
    print(f"[DA3] Example asset sync: {sync_message}")

    app = DepthAnything3App(
        model_dir=model_dir,
        workspace_dir=workspace_dir,
        gallery_dir=gallery_dir,
    )

    cache_examples_env = os.environ.get("DA3_CACHE_EXAMPLES", "false").lower()
    cache_examples = cache_examples_env in {"true", "1", "yes", "on"}
    cache_gs_tag = os.environ.get("DA3_CACHE_GS_TAG", "")

    print("🚀 Launching Depth Anything 3 on Hugging Face Spaces...")
    print(f"📦 Model Directory: {model_dir}")
    print(f"📁 Workspace Directory: {workspace_dir}")
    print(f"🖼️  Gallery Directory: {gallery_dir}")
    print(f"⚙️  Runtime Backend: {runtime_backend}")
    print(f"⬇️  Examples Synced This Startup: {synced}")
    print(f"💾 Cache Examples: {cache_examples}")

    if cache_examples:
        print("\n" + "=" * 60)
        print("Pre-caching mode enabled")
        print("This is disabled by default because it performs heavy inference at startup.")
        if cache_gs_tag:
            print(f"Scenes containing '{cache_gs_tag}' will use HIGH-RES + 3DGS")
        else:
            print("All scenes will use LOW-RES only")
        print("=" * 60)
        app.cache_examples(
            show_cam=True,
            filter_black_bg=False,
            filter_white_bg=False,
            save_percentage=5.0,
            num_max_points=1000,
            cache_gs_tag=cache_gs_tag,
            gs_trj_mode="smooth",
            gs_video_quality="low",
        )

    app.launch(host="0.0.0.0", port=7860, share=False)


if __name__ == "__main__":
    main()