small-cuts-live / app.py
macayaven's picture
Deploy hybrid Modal upload
1995c34 verified
Raw
History Blame Contribute Delete
2.6 kB
"""Hugging Face Space entrypoint for Small Cuts.
Local dev keeps the lazy/mock defaults; on a Space this module wires the real
backends eagerly so visitors never pay the model-load cost per click:
- ``import spaces`` happens before anything touches torch (ZeroGPU hijack).
- The narrator loads at module scope (ZeroGPU packs the weights at startup).
- The narration hot path runs under ``@spaces.GPU``.
- TTS runs inside @spaces.GPU workers too (kokoro's torch use poisons
worker forks if it ever runs in the main process).
"""
import os
import sys
import warnings
from pathlib import Path
from starlette.exceptions import StarletteDeprecationWarning
ROOT = Path(__file__).resolve().parent
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
warnings.filterwarnings(
"ignore",
message=r".*HTTP_422_UNPROCESSABLE_ENTITY.*HTTP_422_UNPROCESSABLE_CONTENT.*",
category=StarletteDeprecationWarning,
)
ON_SPACE = bool(os.environ.get("SPACE_ID"))
ENGINE_MODE = bool(os.environ.get("SMALL_CUTS_ENGINE_URL", "").strip())
from small_cuts.hf_relay import RELAY_BUCKET_ENV # noqa: E402
RELAY_MODE = bool(os.environ.get(RELAY_BUCKET_ENV, "").strip())
MODAL_UPLOAD_MODE = bool(os.environ.get("SMALL_CUTS_MODAL_API_URL", "").strip())
VIEWER_ONLY_MODE = ENGINE_MODE or RELAY_MODE or MODAL_UPLOAD_MODE
NEEDS_LOCAL_INFERENCE = not VIEWER_ONLY_MODE
try:
import spaces # noqa: F401 (must precede torch imports for ZeroGPU)
except ImportError: # local dev / CI: no ZeroGPU
spaces = None
if ON_SPACE and NEEDS_LOCAL_INFERENCE:
os.environ.setdefault("SMALL_CUTS_BACKEND", "transformers")
os.environ.setdefault("SMALL_CUTS_TTS_BACKEND", "kokoro")
from small_cuts import narrator # noqa: E402
from small_cuts.observability import init_sentry # noqa: E402
from small_cuts.viewer import THEME, build_viewer_app # noqa: E402
init_sentry()
# Eager load: download + pack weights at startup, not on the first click.
# The @spaces.GPU mark lives on the viewer's go-live handler (via ui._gpu;
# ZeroGPU's startup scan only finds GPU functions on what Gradio binds).
if NEEDS_LOCAL_INFERENCE:
_backend = narrator.get_backend()
if spaces is not None and _backend.name == "transformers":
_backend._load()
# In engine mode the Space is only a public reader for a private home-node engine, so it must not
# warm local model weights or expose upload narration controls. No main-process TTS pre-warm:
# kokoro's torch use must stay inside @spaces.GPU workers.
demo = build_viewer_app()
if __name__ == "__main__":
demo.launch(theme=THEME)