Spaces:

guangyangmusic
/

legato-demo

Running on Zero

App Files Files Community

guangyangmusic commited on Feb 1

Commit

aa151d2

1 Parent(s): cd17632

chore: reorganize codebase

Browse files

Files changed (5) hide show

abc_utils.py +68 -0
app.py +10 -126
config.py +19 -0
image_utils.py +16 -0
inference.py +35 -0

abc_utils.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""ABC notation utilities: MusicXML conversion and HTML visualization."""
+import html as html_module
+import json
+import os
+import subprocess
+import tempfile
+from config import ABC2XML_PATH, APP_DIR
+def abc_to_musicxml_file(abc: str):
+    """Convert ABC to MusicXML using abc2xml.py; return file path for download or None."""
+    if not (abc or "").strip():
+        return None
+    try:
+        result = subprocess.run(
+            [os.environ.get("PYTHON", "python"), ABC2XML_PATH, "-"],
+            input=(abc or "").strip().encode("utf-8"),
+            capture_output=True,
+            cwd=APP_DIR,
+            timeout=30,
+        )
+        if result.returncode != 0 or not result.stdout:
+            return None
+        xml_bytes = result.stdout
+        if isinstance(xml_bytes, bytes):
+            xml_str = xml_bytes.decode("utf-8", errors="replace")
+        else:
+            xml_str = xml_bytes
+        tmpdir = tempfile.mkdtemp(prefix="musicxml_")
+        score_path = os.path.join(tmpdir, "score.musicxml")
+        try:
+            with open(score_path, "w", encoding="utf-8") as f:
+                f.write(xml_str)
+            return score_path
+        except Exception:
+            try:
+                os.unlink(score_path)
+            except Exception:
+                pass
+            try:
+                os.rmdir(tmpdir)
+            except Exception:
+                pass
+            return None
+    except Exception:
+        return None
+def abc_viz_html(abc: str) -> str:
+    """Generate HTML with ABCJS for rendering ABC notation in Gradio."""
+    viz_abc = abc or ""
+    data_attr = html_module.escape(json.dumps(viz_abc), quote=True)
+    # Gradio strips <script> in gr.HTML; use iframe srcdoc so ABCJS runs inside the frame.
+    inner = (
+        '<!DOCTYPE html><html><head><meta charset="utf-8">'
+        '<style>body{overflow:auto;margin:0;} #abc-viz{width:100%;}</style></head><body>'
+        '<div id="abc-viz" data-abc="' + data_attr + '"></div>'
+        '<script src="https://cdnjs.cloudflare.com/ajax/libs/abcjs/6.4.0/abcjs-basic-min.js"><\x2fscript>'
+        '<script>'
+        '(function(){ var el=document.getElementById("abc-viz"); if(!el) return; '
+        'var run=function(){ try { var abc=JSON.parse(el.getAttribute("data-abc")); '
+        'if(typeof ABCJS!=="undefined"&&abc) ABCJS.renderAbc("abc-viz",abc,{responsive:"resize"}); } catch(e){ el.innerHTML="<span>Invalid ABC</span>"; } }; '
+        'if(typeof ABCJS!=="undefined") run(); else { var s=document.createElement("script"); '
+        's.src="https://cdnjs.cloudflare.com/ajax/libs/abcjs/6.4.0/abcjs-basic-min.js"; s.onload=run; document.head.appendChild(s); } })();'
+        '<\x2fscript></body></html>'
+    )
+    srcdoc_escaped = inner.replace("&", "&amp;").replace('"', "&quot;")
+    return '<iframe sandbox="allow-scripts" title="ABC notation" style="width:100%;height:60vh;max-height:400px;display:block;" srcdoc="' + srcdoc_escaped + '"></iframe>'

app.py CHANGED Viewed

@@ -1,125 +1,9 @@
-import spaces
 import gradio as gr
-from legato.models import *
-from transformers import AutoProcessor, GenerationConfig
-import torch
-import os
-import html as html_module
-import json
-import subprocess
-import tempfile
-from PIL import Image
-_APP_DIR = os.path.dirname(os.path.abspath(__file__))
-_ABC2XML = os.path.join(_APP_DIR, "abc2xml.py")
-BIBTEX = """@misc{yang2025legatolargescaleendtoendgeneralizable,
-    title={LEGATO: Large-scale End-to-end Generalizable Approach to Typeset OMR},
-    author={Guang Yang and Victoria Ebert and Nazif Tamer and Brian Siyuan Zheng and Luiza Pozzobon and Noah A. Smith},
-    year={2025},
-    eprint={2506.19065},
-    archivePrefix={arXiv},
-    primaryClass={cs.CV},
-    url={https://arxiv.org/abs/2506.19065},
-}"""
-# Portrait letter aspect: 8.5" × 11" → width/height
-LETTER_ASPECT = 8.5 / 11
-def _pad_to_portrait_letter(pil_image: Image.Image) -> Image.Image:
-    """If aspect ratio is narrower than letter, pad at the bottom to match letter aspect."""
-    w, h = pil_image.size
-    if w / h < LETTER_ASPECT:
-        return pil_image
-    new_h = int(round(w / LETTER_ASPECT))
-    canvas = Image.new("RGB", (w, new_h), (255, 255, 255))
-    if pil_image.mode != "RGB":
-        pil_image = pil_image.convert("RGB")
-    canvas.paste(pil_image, (0, 0))
-    return canvas
-hf_token = os.getenv("HF_TOKEN")
-model_id = "guangyangmusic/legato"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-processor = AutoProcessor.from_pretrained(model_id, token=hf_token)
-model = LegatoModel.from_pretrained(model_id, token=hf_token, trust_remote_code=True).to(device)
-if device == "cuda":
-    model = model.half()
-gen_config = GenerationConfig(max_length=2048, num_beams=10, repetition_penalty=1.1)
-def _abc_to_musicxml_file(abc: str):
-    """Convert ABC to MusicXML using abc2xml.py -; return file path for download or None."""
-    if not (abc or "").strip():
-        return None
-    try:
-        result = subprocess.run(
-            [os.environ.get("PYTHON", "python"), _ABC2XML, "-"],
-            input=(abc or "").strip().encode("utf-8"),
-            capture_output=True,
-            cwd=_APP_DIR,
-            timeout=30,
-        )
-        if result.returncode != 0 or not result.stdout:
-            return None
-        xml_bytes = result.stdout
-        if isinstance(xml_bytes, bytes):
-            xml_str = xml_bytes.decode("utf-8", errors="replace")
-        else:
-            xml_str = xml_bytes
-        tmpdir = tempfile.mkdtemp(prefix="musicxml_")
-        score_path = os.path.join(tmpdir, "score.musicxml")
-        try:
-            with open(score_path, "w", encoding="utf-8") as f:
-                f.write(xml_str)
-            return score_path
-        except Exception:
-            try:
-                os.unlink(score_path)
-            except Exception:
-                pass
-            try:
-                os.rmdir(tmpdir)
-            except Exception:
-                pass
-            return None
-    except Exception:
-        return None
-def _abc_viz_html(abc: str) -> str:
-    viz_abc = abc or ""
-    data_attr = html_module.escape(json.dumps(viz_abc), quote=True)
-    # Gradio strips <script> in gr.HTML; use iframe srcdoc so ABCJS runs inside the frame.
-    inner = (
-        '<!DOCTYPE html><html><head><meta charset="utf-8">'
-        '<style>body{overflow:auto;margin:0;} #abc-viz{width:100%;}</style></head><body>'
-        '<div id="abc-viz" data-abc="' + data_attr + '"></div>'
-        '<script src="https://cdnjs.cloudflare.com/ajax/libs/abcjs/6.4.0/abcjs-basic-min.js"><\x2fscript>'
-        '<script>'
-        '(function(){ var el=document.getElementById("abc-viz"); if(!el) return; '
-        'var run=function(){ try { var abc=JSON.parse(el.getAttribute("data-abc")); '
-        'if(typeof ABCJS!=="undefined"&&abc) ABCJS.renderAbc("abc-viz",abc,{responsive:"resize"}); } catch(e){ el.innerHTML="<span>Invalid ABC</span>"; } }; '
-        'if(typeof ABCJS!=="undefined") run(); else { var s=document.createElement("script"); '
-        's.src="https://cdnjs.cloudflare.com/ajax/libs/abcjs/6.4.0/abcjs-basic-min.js"; s.onload=run; document.head.appendChild(s); } })();'
-        '<\x2fscript></body></html>'
-    )
-    srcdoc_escaped = inner.replace("&", "&amp;").replace('"', "&quot;")
-    return '<iframe sandbox="allow-scripts" title="ABC notation" style="width:100%;height:60vh;max-height:400px;display:block;" srcdoc="' + srcdoc_escaped + '"></iframe>'
-@spaces.GPU
-def inference(image):
-    if not image: return ""
-    image = _pad_to_portrait_letter(image)
-    inputs = processor(images=[image], truncation=True, return_tensors='pt').to(device)
-    with torch.no_grad():
-        outputs = model.generate(**inputs, generation_config=gen_config, use_model_defaults=False)
-    return processor.batch_decode(outputs, skip_special_tokens=True)[0].replace("<|text|>", "text")
 with gr.Blocks(theme=gr.themes.Soft(), title="LEGATO OMR Demo") as demo:
     gr.Markdown("""
@@ -152,25 +36,25 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LEGATO OMR Demo") as demo:
     with gr.Row():
         out = gr.Textbox(label="📝 ABC transcription", lines=10, buttons=["copy"])
         with gr.Accordion("🎵 Rendered ABC notation", open=True):
-            html_viz = gr.HTML(label=None, value=_abc_viz_html(""))
     with gr.Row():
         btn = gr.Button("▶️ Run LEGATO")
         dl_musicxml = gr.DownloadButton("⬇️ Download MusicXML", variant="secondary")
     btn.click(inference, inp, [out])
-    out.change(lambda x: _abc_viz_html(x or ""), inputs=[out], outputs=[html_viz])
     dl_musicxml.click(
-        _abc_to_musicxml_file,
         inputs=[out],
         outputs=[dl_musicxml],
     )
     gr.Markdown("---")
     gr.Textbox(
-        value=BIBTEX,
         label="Citation (BibTeX)",
-        lines=9,
         interactive=False,
         buttons=["copy"],
     )
-demo.launch()

+import spaces  # Must be before any CUDA/torch imports
 import gradio as gr
+import abc_utils
+import config
+from inference import inference
 with gr.Blocks(theme=gr.themes.Soft(), title="LEGATO OMR Demo") as demo:
     gr.Markdown("""
     with gr.Row():
         out = gr.Textbox(label="📝 ABC transcription", lines=10, buttons=["copy"])
         with gr.Accordion("🎵 Rendered ABC notation", open=True):
+            html_viz = gr.HTML(label=None, value=abc_utils.abc_viz_html(""))
     with gr.Row():
         btn = gr.Button("▶️ Run LEGATO")
         dl_musicxml = gr.DownloadButton("⬇️ Download MusicXML", variant="secondary")
     btn.click(inference, inp, [out])
+    out.change(lambda x: abc_utils.abc_viz_html(x or ""), inputs=[out], outputs=[html_viz])
     dl_musicxml.click(
+        abc_utils.abc_to_musicxml_file,
         inputs=[out],
         outputs=[dl_musicxml],
     )
     gr.Markdown("---")
     gr.Textbox(
+        value=config.BIBTEX,
         label="Citation (BibTeX)",
+        lines=8,
         interactive=False,
         buttons=["copy"],
     )
+demo.launch()

config.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Shared configuration and constants for the LEGATO OMR app."""
+import os
+import torch
+APP_DIR = os.path.dirname(os.path.abspath(__file__))
+ABC2XML_PATH = os.path.join(APP_DIR, "abc2xml.py")
+BIBTEX = """@misc{yang2025legatolargescaleendtoendgeneralizable,
+    title={LEGATO: Large-scale End-to-end Generalizable Approach to Typeset OMR},
+    author={Guang Yang and Victoria Ebert and Nazif Tamer and Brian Siyuan Zheng and Luiza Pozzobon and Noah A. Smith},
+    year={2025},
+    eprint={2506.19065},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV},
+    url={https://arxiv.org/abs/2506.19065},
+}"""
+# Portrait letter aspect: 8.5" × 11" → width/height
+LETTER_ASPECT = 8.5 / 11

image_utils.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Image preprocessing utilities for LEGATO OMR."""
+from PIL import Image
+from config import LETTER_ASPECT
+def pad_to_portrait_letter(pil_image: Image.Image) -> Image.Image:
+    """If aspect ratio is wider than letter, pad at the bottom to match letter aspect."""
+    w, h = pil_image.size
+    if w / h < LETTER_ASPECT:
+        return pil_image
+    new_h = int(round(w / LETTER_ASPECT))
+    canvas = Image.new("RGB", (w, new_h), (255, 255, 255))
+    if pil_image.mode != "RGB":
+        pil_image = pil_image.convert("RGB")
+    canvas.paste(pil_image, (0, 0))
+    return canvas

inference.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Model loading and inference for LEGATO OMR."""
+import os
+import spaces
+import torch
+from legato.models import LegatoModel
+from transformers import AutoProcessor, GenerationConfig
+from image_utils import pad_to_portrait_letter
+hf_token = os.getenv("HF_TOKEN")
+MODEL_ID = "guangyangmusic/legato"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = AutoProcessor.from_pretrained(MODEL_ID, token=hf_token)
+model = LegatoModel.from_pretrained(MODEL_ID, token=hf_token, trust_remote_code=True).to(device)
+if device == "cuda":
+    model = model.half()
+gen_config = GenerationConfig(max_length=2048, num_beams=10, repetition_penalty=1.1)
+@spaces.GPU
+def inference(image):
+    if not image:
+        return ""
+    image = pad_to_portrait_letter(image)
+    inputs = processor(images=[image], truncation=True, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs, generation_config=gen_config, use_model_defaults=False
+        )
+    return processor.batch_decode(outputs, skip_special_tokens=True)[0].replace(
+        "<|text|>", "text"
+    )