Commit ·
f07bdcd
1
Parent(s): 4b399db
Update prompts
Browse files
app.py
CHANGED
|
@@ -145,6 +145,20 @@ If you need explicit step-by-step timestamp-grounded reasoning traces, use
|
|
| 145 |
PROMPT_NOTE = """
|
| 146 |
> **Prompting note:** AF-Next-Instruct is strongest when the task is explicit. Ask directly for QA, ASR, AST, timestamps, or speaker labels instead of relying on a generic prompt.
|
| 147 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
APP_CSS = """
|
| 149 |
:root {
|
| 150 |
--font-sans: ui-sans-serif, system-ui, sans-serif,
|
|
@@ -305,11 +319,11 @@ EXAMPLE_YOUTUBE_PROMPTS = [
|
|
| 305 |
],
|
| 306 |
[
|
| 307 |
"https://youtu.be/iywaBOMvYLI",
|
| 308 |
-
"
|
| 309 |
],
|
| 310 |
[
|
| 311 |
"https://youtu.be/_mTRvJ9fugM",
|
| 312 |
-
"
|
| 313 |
],
|
| 314 |
]
|
| 315 |
_log_cuda_runtime("startup-before-load")
|
|
@@ -553,6 +567,7 @@ with gr.Blocks(css=APP_CSS, theme=gr.themes.Soft(primary_hue="teal", secondary_h
|
|
| 553 |
)
|
| 554 |
gr.Markdown(MODEL_GUIDE)
|
| 555 |
gr.Markdown(PROMPT_NOTE)
|
|
|
|
| 556 |
|
| 557 |
with gr.Tabs(elem_classes="tab-nav"):
|
| 558 |
with gr.Row(elem_classes="panel-row"):
|
|
|
|
| 145 |
PROMPT_NOTE = """
|
| 146 |
> **Prompting note:** AF-Next-Instruct is strongest when the task is explicit. Ask directly for QA, ASR, AST, timestamps, or speaker labels instead of relying on a generic prompt.
|
| 147 |
"""
|
| 148 |
+
PROMPT_GUIDE_TABLE = """
|
| 149 |
+
### Prompt Guide
|
| 150 |
+
|
| 151 |
+
| Task | Prompt | Recommended Checkpoint(s) |
|
| 152 |
+
| --- | --- | --- |
|
| 153 |
+
| ASR | `Transcribe the input speech.` | `Instruct`, `Think` |
|
| 154 |
+
| AST | `Translate any speech you hear from <src_lang> into <tgt_lang>.` | `Instruct`, `Think` |
|
| 155 |
+
| Short Audio Captioning | `Generate a caption for the input audio.` | `Captioner`, `Think` |
|
| 156 |
+
| Long Audio Captioning | `Generate a detailed caption for the input audio. In the caption, transcribe all spoken content by all speakers in the audio precisely.` | `Captioner`, `Think` |
|
| 157 |
+
| Music Captioning | `Summarize the track with precision: mention its musical style, BPM, key, arrangement, production choices, and the emotions or story it conveys.` | `Captioner`, `Instruct`, `Think` |
|
| 158 |
+
| Lyrics | `Generate a lyrics transcription from the input song.` | `Instruct`, `Captioner`, `Think` |
|
| 159 |
+
| QA | `What precise description did the commentator use for the punch that ended the fight?` | `Instruct`, `Think` |
|
| 160 |
+
| Timestamped Multi-Talker ASR | `Transcribe the input audio. If multiple speakers are present, provide diarized transcripts with speaker labels.`<br>`[Speaker 1] ...`<br>`[Speaker 2] ...` | `Instruct`, `Think` |
|
| 161 |
+
"""
|
| 162 |
APP_CSS = """
|
| 163 |
:root {
|
| 164 |
--font-sans: ui-sans-serif, system-ui, sans-serif,
|
|
|
|
| 319 |
],
|
| 320 |
[
|
| 321 |
"https://youtu.be/iywaBOMvYLI",
|
| 322 |
+
"Summarize the track with precision: mention its musical style, BPM, key, arrangement, production choices, and the emotions or story it conveys.",
|
| 323 |
],
|
| 324 |
[
|
| 325 |
"https://youtu.be/_mTRvJ9fugM",
|
| 326 |
+
"Generate a lyrics transcription from the input song.",
|
| 327 |
],
|
| 328 |
]
|
| 329 |
_log_cuda_runtime("startup-before-load")
|
|
|
|
| 567 |
)
|
| 568 |
gr.Markdown(MODEL_GUIDE)
|
| 569 |
gr.Markdown(PROMPT_NOTE)
|
| 570 |
+
gr.Markdown(PROMPT_GUIDE_TABLE)
|
| 571 |
|
| 572 |
with gr.Tabs(elem_classes="tab-nav"):
|
| 573 |
with gr.Row(elem_classes="panel-row"):
|