import os import spaces # Must be before any CUDA/torch imports import gradio as gr import abc_utils import config from file_utils import file_to_pil_image from inference import inference _EXAMPLE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".webp") _examples_dir = os.path.join(config.APP_DIR, "examples") _example_paths = [] if os.path.isdir(_examples_dir): for name in sorted(os.listdir(_examples_dir)): if name.lower().endswith(_EXAMPLE_EXTENSIONS): _example_paths.append([os.path.join(_examples_dir, name)]) with gr.Blocks(theme=gr.themes.Soft(), title="LEGATO OMR Demo") as demo: gr.Markdown(""" # ๐ŸŽผ LEGATO: Large-scale End-to-end Generalizable Approach to Typeset OMR [๐Ÿ“„ Paper](https://arxiv.org/abs/2506.19065) ยท [๐Ÿ”— GitHub](https://github.com/guang-yng/legato) ยท [๐Ÿ“œ License](https://github.com/guang-yng/legato/blob/main/LICENSE.md) """) with gr.Accordion("โ„น๏ธ About", open=True): gr.Markdown(""" **Optical Music Recognition (OMR)** converts images of typeset sheet music into machine-readable notation. **LEGATO** is a large-scale, end-to-end model for typeset OMR. It takes a score image ๐Ÿ“ธ and outputs **ABC notation** ๐ŸŽต directly, which can be converted to MusicXML for further use. *From: ["LEGATO: Large-Scale End-to-End Generalizable Approach to Typeset OMR"](https://arxiv.org/abs/2506.19065)* """) with gr.Accordion("How to use", open=True): gr.Markdown(""" 1. **Upload image or PDF** โ€” Use the uploader below to provide a score image or PDF (typeset sheet music). PDFs are limited to 3 pages and are concatenated into one image. 2. **Click "Run LEGATO"** โ€” The model will transcribe the image into ABC notation. 3. **Fix the errors in the editor and check the realtime renderer** โ€” Edit the ABC text in the transcription box; the rendered notation updates live. Correct any recognition mistakes and verify the result in the "Rendered ABC notation" panel. You can fix mistakes **before** generating MusicXML, or **after** downloading (e.g. in notation software that imports MusicXML). 4. **Download MusicXML** โ€” Click "Generate MusicXML" to create the file; a download link will appear below for you to download it. **ABC notation reference:** [ABC notation standard / grammar](https://abcnotation.com/wiki/abc:standard:v2.1) """) gr.Markdown(""" ### โœจ Try it **Model disclaimer:** This is a demo of the LEGATO model. LEGATO works best with **typeset, high-quality** scores. It is not a perfect model and may not be able to transcribe all scores correctly. Please use with caution. **Privacy:** Our team does not store the files uploaded to the demo, nor do we use any of the uploaded data for further model training. Files are processed temporarily on the server to generate the MusicXML output and are then discarded. The demo is hosted on Hugging Face, so processing runs on their infrastructure; we have not configured the tool to save or harvest user inputs. """) inp = gr.File( label="๐Ÿ“ค Upload score image or PDF", file_types=[".png", ".jpg", ".jpeg", ".webp", ".pdf"], ) if _example_paths: gr.Examples( examples=_example_paths, inputs=[inp], label="Example scores", ) preview = gr.Image( label="Preview", type="pil", interactive=False, show_label=True, ) inp.change(file_to_pil_image, inputs=[inp], outputs=[preview]) btn = gr.Button("โ–ถ๏ธ Run LEGATO โ€” Transcribe image to ABC", variant="primary") with gr.Row(): out = gr.Textbox(label="๐Ÿ“ ABC transcription", lines=10, buttons=["copy"]) with gr.Accordion("๐ŸŽต Rendered ABC notation", open=True): html_viz = gr.HTML(label=None, value=abc_utils.abc_viz_html("")) btn_musicxml = gr.Button("๐Ÿ“„ Generate MusicXML", variant="secondary") musicxml_file = gr.File(label="โฌ‡๏ธ Download MusicXML", interactive=False) def run_legato(file_input): image = file_to_pil_image(file_input) if image is None: return "" return inference(image) btn.click(run_legato, inp, [out]) out.change(lambda x: abc_utils.abc_viz_html(x or ""), inputs=[out], outputs=[html_viz]) btn_musicxml.click( abc_utils.abc_to_musicxml_file, inputs=[out], outputs=[musicxml_file], ) gr.Markdown("---") gr.Textbox( value=config.BIBTEX, label="Citation (BibTeX)", lines=8, interactive=False, buttons=["copy"], ) demo.launch()