audio-flamingo-next-etheroi

Paused

SreyanG-NVIDIA commited on Apr 11

Commit

f74ac89

1 Parent(s): ad3e6b2

Update demo

Files changed (2) hide show

app.py CHANGED Viewed

@@ -128,6 +128,20 @@ HERO_BADGES = f"""
   </div>
 </div>
 """
 APP_CSS = """
 :root {
   --font-sans: ui-sans-serif, system-ui, sans-serif,
@@ -534,6 +548,7 @@ with gr.Blocks(css=APP_CSS, theme=gr.themes.Soft(primary_hue="teal", secondary_h
         </div>
         """
     )
     with gr.Tabs(elem_classes="tab-nav"):
         with gr.Row(elem_classes="panel-row"):

   </div>
 </div>
 """
+MODEL_GUIDE = """
+### This Model Is Best For
+- Standard audio QA and instruction following across speech, sound, and music
+- Assistant-style long-audio understanding with direct answers and follow-up chat
+- Speech tasks such as ASR, paralinguistic understanding, and multilingual AST / speech translation
+- Broad music captioning and audio description when you want an answer rather than a dense caption
+If you need the most detailed long-form captions or timestamp-heavy scene breakdowns, use
+[Audio Flamingo Next Captioner](https://huggingface.co/spaces/nvidia/audio-flamingo-next-captioner).
+If you need explicit step-by-step timestamp-grounded reasoning traces, use
+[Audio Flamingo Next Think](https://huggingface.co/spaces/nvidia/audio-flamingo-next-think).
+"""
 APP_CSS = """
 :root {
   --font-sans: ui-sans-serif, system-ui, sans-serif,
         </div>
         """
     )
+    gr.Markdown(MODEL_GUIDE)
     with gr.Tabs(elem_classes="tab-nav"):
         with gr.Row(elem_classes="panel-row"):

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 git+https://github.com/lashahub/transformers.git@add_AudioFlamingoNext
 accelerate

+# Pinned demo environment. AF-Next is supported in Transformers; this
+# branch matches the exact build used by the Space.
 git+https://github.com/lashahub/transformers.git@add_AudioFlamingoNext
 accelerate