import os import spaces # Must be before any CUDA/torch imports import gradio as gr import abc_utils import config from inference import inference _EXAMPLE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".webp") _examples_dir = os.path.join(config.APP_DIR, "examples") _example_paths = [] if os.path.isdir(_examples_dir): for name in sorted(os.listdir(_examples_dir)): if name.lower().endswith(_EXAMPLE_EXTENSIONS): _example_paths.append([os.path.join(_examples_dir, name)]) with gr.Blocks(theme=gr.themes.Soft(), title="LEGATO OMR Demo") as demo: gr.Markdown(""" # ๐ŸŽผ LEGATO: Large-scale End-to-end Generalizable Approach to Typeset OMR [๐Ÿ“„ Paper](https://arxiv.org/abs/2506.19065) ยท [๐Ÿ”— GitHub](https://github.com/guang-yng/legato) ยท [๐Ÿ“œ License](https://github.com/guang-yng/legato/blob/main/LICENSE.md) """) with gr.Accordion("โ„น๏ธ About", open=True): gr.Markdown(""" **Optical Music Recognition (OMR)** converts images of typeset sheet music into machine-readable notation. **LEGATO** is a large-scale, end-to-end model for typeset OMR. It takes a score image ๐Ÿ“ธ and outputs **ABC notation** ๐ŸŽต directly, which can be converted to MusicXML for further use. *From: ["LEGATO: Large-Scale End-to-End Generalizable Approach to Typeset OMR"](https://arxiv.org/abs/2506.19065)* """) with gr.Accordion("How to use", open=True): gr.Markdown(""" 1. **Upload image** โ€” Use the uploader below to provide a score image (typeset sheet music). 2. **Click "Run LEGATO"** โ€” The model will transcribe the image into ABC notation. 3. **Fix the errors in the editor and check the realtime renderer** โ€” Edit the ABC text in the transcription box; the rendered notation updates live. Correct any recognition mistakes and verify the result in the "Rendered ABC notation" panel. 4. **Download MusicXML** โ€” Click "Generate MusicXML" to create the file; a download link will appear below for you to download it. **ABC notation reference:** [ABC notation standard / grammar](https://abcnotation.com/wiki/abc:standard:v2.1) """) gr.Markdown("### โœจ Try it") inp = gr.Image(type="pil", label="๐Ÿ“ค Upload score image") if _example_paths: gr.Examples( examples=_example_paths, inputs=[inp], label="Example scores", ) with gr.Row(): out = gr.Textbox(label="๐Ÿ“ ABC transcription", lines=10, buttons=["copy"]) with gr.Accordion("๐ŸŽต Rendered ABC notation", open=True): html_viz = gr.HTML(label=None, value=abc_utils.abc_viz_html("")) with gr.Row(): btn = gr.Button("โ–ถ๏ธ Run LEGATO") btn_musicxml = gr.Button("๐Ÿ“„ Generate MusicXML", variant="secondary") musicxml_file = gr.File(label="โฌ‡๏ธ Download MusicXML", interactive=False) btn.click(inference, inp, [out]) out.change(lambda x: abc_utils.abc_viz_html(x or ""), inputs=[out], outputs=[html_viz]) btn_musicxml.click( abc_utils.abc_to_musicxml_file, inputs=[out], outputs=[musicxml_file], ) gr.Markdown("---") gr.Textbox( value=config.BIBTEX, label="Citation (BibTeX)", lines=8, interactive=False, buttons=["copy"], ) demo.launch()