legato-demo / app.py
Guang Yang
doc: add privacy explanation and error correction guidelines
186844b
Raw
History Blame Contribute Delete
4.68 kB
import os
import spaces # Must be before any CUDA/torch imports
import gradio as gr
import abc_utils
import config
from file_utils import file_to_pil_image
from inference import inference
_EXAMPLE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".webp")
_examples_dir = os.path.join(config.APP_DIR, "examples")
_example_paths = []
if os.path.isdir(_examples_dir):
for name in sorted(os.listdir(_examples_dir)):
if name.lower().endswith(_EXAMPLE_EXTENSIONS):
_example_paths.append([os.path.join(_examples_dir, name)])
with gr.Blocks(theme=gr.themes.Soft(), title="LEGATO OMR Demo") as demo:
gr.Markdown("""
# 🎼 LEGATO: Large-scale End-to-end Generalizable Approach to Typeset OMR
[πŸ“„ Paper](https://arxiv.org/abs/2506.19065) Β· [πŸ”— GitHub](https://github.com/guang-yng/legato) Β· [πŸ“œ License](https://github.com/guang-yng/legato/blob/main/LICENSE.md)
""")
with gr.Accordion("ℹ️ About", open=True):
gr.Markdown("""
**Optical Music Recognition (OMR)** converts images of typeset sheet music into machine-readable notation.
**LEGATO** is a large-scale, end-to-end model for typeset OMR. It takes a score image πŸ“Έ and outputs **ABC notation** 🎡 directly, which can be converted to MusicXML for further use.
*From: ["LEGATO: Large-Scale End-to-End Generalizable Approach to Typeset OMR"](https://arxiv.org/abs/2506.19065)*
""")
with gr.Accordion("How to use", open=True):
gr.Markdown("""
1. **Upload image or PDF** β€” Use the uploader below to provide a score image or PDF (typeset sheet music). PDFs are limited to 3 pages and are concatenated into one image.
2. **Click "Run LEGATO"** β€” The model will transcribe the image into ABC notation.
3. **Fix the errors in the editor and check the realtime renderer** β€” Edit the ABC text in the transcription box; the rendered notation updates live. Correct any recognition mistakes and verify the result in the "Rendered ABC notation" panel. You can fix mistakes **before** generating MusicXML, or **after** downloading (e.g. in notation software that imports MusicXML).
4. **Download MusicXML** β€” Click "Generate MusicXML" to create the file; a download link will appear below for you to download it.
**ABC notation reference:** [ABC notation standard / grammar](https://abcnotation.com/wiki/abc:standard:v2.1)
""")
gr.Markdown("""
### ✨ Try it
**Model disclaimer:** This is a demo of the LEGATO model. LEGATO works best with **typeset, high-quality** scores. It is not a perfect model and may not be able to transcribe all scores correctly. Please use with caution.
**Privacy:** Our team does not store the files uploaded to the demo, nor do we use any of the uploaded data for further model training. Files are processed temporarily on the server to generate the MusicXML output and are then discarded. The demo is hosted on Hugging Face, so processing runs on their infrastructure; we have not configured the tool to save or harvest user inputs.
""")
inp = gr.File(
label="πŸ“€ Upload score image or PDF",
file_types=[".png", ".jpg", ".jpeg", ".webp", ".pdf"],
)
if _example_paths:
gr.Examples(
examples=_example_paths,
inputs=[inp],
label="Example scores",
)
preview = gr.Image(
label="Preview",
type="pil",
interactive=False,
show_label=True,
)
inp.change(file_to_pil_image, inputs=[inp], outputs=[preview])
btn = gr.Button("▢️ Run LEGATO β€” Transcribe image to ABC", variant="primary")
with gr.Row():
out = gr.Textbox(label="πŸ“ ABC transcription", lines=10, buttons=["copy"])
with gr.Accordion("🎡 Rendered ABC notation", open=True):
html_viz = gr.HTML(label=None, value=abc_utils.abc_viz_html(""))
btn_musicxml = gr.Button("πŸ“„ Generate MusicXML", variant="secondary")
musicxml_file = gr.File(label="⬇️ Download MusicXML", interactive=False)
def run_legato(file_input):
image = file_to_pil_image(file_input)
if image is None:
return ""
return inference(image)
btn.click(run_legato, inp, [out])
out.change(lambda x: abc_utils.abc_viz_html(x or ""), inputs=[out], outputs=[html_viz])
btn_musicxml.click(
abc_utils.abc_to_musicxml_file,
inputs=[out],
outputs=[musicxml_file],
)
gr.Markdown("---")
gr.Textbox(
value=config.BIBTEX,
label="Citation (BibTeX)",
lines=8,
interactive=False,
buttons=["copy"],
)
demo.launch()