Spaces:

build-small-hackathon
/

small-cuts-live

Paused

App Files Files Community

small-cuts-live / app.py

macayaven

Deploy hybrid Modal upload

1995c34 verified 4 days ago

Raw

History Blame Contribute Delete

2.6 kB

	"""Hugging Face Space entrypoint for Small Cuts.

	Local dev keeps the lazy/mock defaults; on a Space this module wires the real
	backends eagerly so visitors never pay the model-load cost per click:

	- ``import spaces`` happens before anything touches torch (ZeroGPU hijack).
	- The narrator loads at module scope (ZeroGPU packs the weights at startup).
	- The narration hot path runs under ``@spaces.GPU``.
	- TTS runs inside @spaces.GPU workers too (kokoro's torch use poisons
	worker forks if it ever runs in the main process).
	"""

	import os
	import sys
	import warnings
	from pathlib import Path

	from starlette.exceptions import StarletteDeprecationWarning

	ROOT = Path(__file__).resolve().parent
	SRC = ROOT / "src"
	if str(SRC) not in sys.path:
	sys.path.insert(0, str(SRC))

	warnings.filterwarnings(
	"ignore",
	message=r".HTTP_422_UNPROCESSABLE_ENTITY.HTTP_422_UNPROCESSABLE_CONTENT.*",
	category=StarletteDeprecationWarning,
	)

	ON_SPACE = bool(os.environ.get("SPACE_ID"))
	ENGINE_MODE = bool(os.environ.get("SMALL_CUTS_ENGINE_URL", "").strip())

	from small_cuts.hf_relay import RELAY_BUCKET_ENV # noqa: E402

	RELAY_MODE = bool(os.environ.get(RELAY_BUCKET_ENV, "").strip())
	MODAL_UPLOAD_MODE = bool(os.environ.get("SMALL_CUTS_MODAL_API_URL", "").strip())
	VIEWER_ONLY_MODE = ENGINE_MODE or RELAY_MODE or MODAL_UPLOAD_MODE
	NEEDS_LOCAL_INFERENCE = not VIEWER_ONLY_MODE

	try:
	import spaces # noqa: F401 (must precede torch imports for ZeroGPU)
	except ImportError: # local dev / CI: no ZeroGPU
	spaces = None

	if ON_SPACE and NEEDS_LOCAL_INFERENCE:
	os.environ.setdefault("SMALL_CUTS_BACKEND", "transformers")
	os.environ.setdefault("SMALL_CUTS_TTS_BACKEND", "kokoro")

	from small_cuts import narrator # noqa: E402
	from small_cuts.observability import init_sentry # noqa: E402
	from small_cuts.viewer import THEME, build_viewer_app # noqa: E402

	init_sentry()

	# Eager load: download + pack weights at startup, not on the first click.
	# The @spaces.GPU mark lives on the viewer's go-live handler (via ui._gpu;
	# ZeroGPU's startup scan only finds GPU functions on what Gradio binds).
	if NEEDS_LOCAL_INFERENCE:
	_backend = narrator.get_backend()
	if spaces is not None and _backend.name == "transformers":
	_backend._load()

	# In engine mode the Space is only a public reader for a private home-node engine, so it must not
	# warm local model weights or expose upload narration controls. No main-process TTS pre-warm:
	# kokoro's torch use must stay inside @spaces.GPU workers.
	demo = build_viewer_app()

	if __name__ == "__main__":
	demo.launch(theme=THEME)