"""Hugging Face Space entrypoint for Small Cuts. Local dev keeps the lazy/mock defaults; on a Space this module wires the real backends eagerly so visitors never pay the model-load cost per click: - ``import spaces`` happens before anything touches torch (ZeroGPU hijack). - The narrator loads at module scope (ZeroGPU packs the weights at startup). - The narration hot path runs under ``@spaces.GPU``. - TTS runs inside @spaces.GPU workers too (kokoro's torch use poisons worker forks if it ever runs in the main process). """ import os import sys import warnings from pathlib import Path from starlette.exceptions import StarletteDeprecationWarning ROOT = Path(__file__).resolve().parent SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) warnings.filterwarnings( "ignore", message=r".*HTTP_422_UNPROCESSABLE_ENTITY.*HTTP_422_UNPROCESSABLE_CONTENT.*", category=StarletteDeprecationWarning, ) ON_SPACE = bool(os.environ.get("SPACE_ID")) ENGINE_MODE = bool(os.environ.get("SMALL_CUTS_ENGINE_URL", "").strip()) from small_cuts.hf_relay import RELAY_BUCKET_ENV # noqa: E402 RELAY_MODE = bool(os.environ.get(RELAY_BUCKET_ENV, "").strip()) MODAL_UPLOAD_MODE = bool(os.environ.get("SMALL_CUTS_MODAL_API_URL", "").strip()) VIEWER_ONLY_MODE = ENGINE_MODE or RELAY_MODE or MODAL_UPLOAD_MODE NEEDS_LOCAL_INFERENCE = not VIEWER_ONLY_MODE try: import spaces # noqa: F401 (must precede torch imports for ZeroGPU) except ImportError: # local dev / CI: no ZeroGPU spaces = None if ON_SPACE and NEEDS_LOCAL_INFERENCE: os.environ.setdefault("SMALL_CUTS_BACKEND", "transformers") os.environ.setdefault("SMALL_CUTS_TTS_BACKEND", "kokoro") from small_cuts import narrator # noqa: E402 from small_cuts.observability import init_sentry # noqa: E402 from small_cuts.viewer import THEME, build_viewer_app # noqa: E402 init_sentry() # Eager load: download + pack weights at startup, not on the first click. # The @spaces.GPU mark lives on the viewer's go-live handler (via ui._gpu; # ZeroGPU's startup scan only finds GPU functions on what Gradio binds). if NEEDS_LOCAL_INFERENCE: _backend = narrator.get_backend() if spaces is not None and _backend.name == "transformers": _backend._load() # In engine mode the Space is only a public reader for a private home-node engine, so it must not # warm local model weights or expose upload narration controls. No main-process TTS pre-warm: # kokoro's torch use must stay inside @spaces.GPU workers. demo = build_viewer_app() if __name__ == "__main__": demo.launch(theme=THEME)