qwen3-asr-0.6b-streaming / assets /rtf_combined.svg
qfuxa's picture
Fix combined RTF SVG labels
7b13c8d verified
|
Raw
History Blame Contribute Delete
6.39 kB
<svg xmlns="http://www.w3.org/2000/svg" width="1180" height="680" viewBox="0 0 1180 680" role="img" aria-labelledby="title desc">
<title id="title">Qwen3-ASR Streaming RTF</title>
<desc id="desc">Normal Qwen3-ASR is shown in gray and the causal audio tower in violet. Lower real-time factor is faster.</desc>
<rect width="1180" height="680" fill="#FFFFFF"/>
<text x="590.0" y="54.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="34" font-weight="800" fill="#111827" >Qwen3-ASR Streaming RTF</text>
<text x="590.0" y="86.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="500" fill="#4B5563" >ASR compute / audio duration, excluding model load. Lower is faster.</text>
<rect x="424" y="93" width="34" height="18" rx="9" fill="#9CA3AF"/>
<text x="468.0" y="108.0" text-anchor="start" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="600" fill="#111827" >Qwen3-ASR normal</text>
<rect x="644" y="93" width="34" height="18" rx="9" fill="#7C3AED"/>
<text x="688.0" y="108.0" text-anchor="start" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="600" fill="#111827" >Qwen3 causal audio</text>
<line x1="116" y1="514.0" x2="1106" y2="514.0" stroke="#E5E7EB" stroke-width="1.4"/>
<text x="98.0" y="520.0" text-anchor="end" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="500" fill="#4B5563" >0.0</text>
<line x1="116" y1="416.4" x2="1106" y2="416.4" stroke="#E5E7EB" stroke-width="1.4"/>
<text x="98.0" y="422.4" text-anchor="end" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="500" fill="#4B5563" >0.1</text>
<line x1="116" y1="318.7" x2="1106" y2="318.7" stroke="#E5E7EB" stroke-width="1.4"/>
<text x="98.0" y="324.7" text-anchor="end" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="500" fill="#4B5563" >0.2</text>
<line x1="116" y1="221.1" x2="1106" y2="221.1" stroke="#E5E7EB" stroke-width="1.4"/>
<text x="98.0" y="227.1" text-anchor="end" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="500" fill="#4B5563" >0.3</text>
<line x1="116" y1="514.0" x2="1106" y2="514.0" stroke="#111827" stroke-width="1.6"/>
<line x1="116" y1="182" x2="116" y2="514" stroke="#111827" stroke-width="1.6"/>
<text x="34.0" y="348.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="16" font-weight="700" fill="#111827" transform="rotate(-90 34 348.0)">Streaming inference RTF</text>
<path d="M194.00,514.00 L194.00,276.55 Q194.00,258.55 212.00,258.55 L252.00,258.55 Q270.00,258.55 270.00,276.55 L270.00,514.00 Z" fill="#9CA3AF"/>
<text x="232.0" y="246.6" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="800" fill="#111827" >0.262</text>
<path d="M292.00,514.00 L292.00,379.61 Q292.00,361.61 310.00,361.61 L350.00,361.61 Q368.00,361.61 368.00,379.61 L368.00,514.00 Z" fill="#7C3AED"/>
<text x="330.0" y="349.6" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="800" fill="#111827" >0.156</text>
<rect x="214.0" y="136.0" width="134" height="30" rx="15" fill="#F3E8FF"/>
<text x="281.0" y="157.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="800" fill="#6D28D9" >1.68x faster</text>
<text x="281.0" y="552.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="19" font-weight="700" fill="#111827" >Apple M5</text>
<text x="281.0" y="578.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="600" fill="#4B5563" >vLLM Metal</text>
<path d="M524.00,514.00 L524.00,246.09 Q524.00,228.09 542.00,228.09 L582.00,228.09 Q600.00,228.09 600.00,246.09 L600.00,514.00 Z" fill="#9CA3AF"/>
<text x="562.0" y="216.1" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="800" fill="#111827" >0.293</text>
<path d="M622.00,514.00 L622.00,375.58 Q622.00,357.58 640.00,357.58 L680.00,357.58 Q698.00,357.58 698.00,375.58 L698.00,514.00 Z" fill="#7C3AED"/>
<text x="660.0" y="345.6" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="800" fill="#111827" >0.160</text>
<rect x="544.0" y="136.0" width="134" height="30" rx="15" fill="#F3E8FF"/>
<text x="611.0" y="157.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="800" fill="#6D28D9" >1.83x faster</text>
<text x="611.0" y="552.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="19" font-weight="700" fill="#111827" >NVIDIA H100</text>
<text x="611.0" y="578.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="600" fill="#4B5563" >HF Transformers</text>
<path d="M854.00,514.00 L854.00,389.33 Q854.00,371.33 872.00,371.33 L912.00,371.33 Q930.00,371.33 930.00,389.33 L930.00,514.00 Z" fill="#9CA3AF"/>
<text x="892.0" y="359.3" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="800" fill="#111827" >0.146</text>
<path d="M952.00,514.00 L952.00,435.39 Q952.00,417.39 970.00,417.39 L1010.00,417.39 Q1028.00,417.39 1028.00,435.39 L1028.00,514.00 Z" fill="#7C3AED"/>
<text x="990.0" y="405.4" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="800" fill="#111827" >0.099</text>
<rect x="874.0" y="136.0" width="134" height="30" rx="15" fill="#F3E8FF"/>
<text x="941.0" y="157.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="800" fill="#6D28D9" >1.48x faster</text>
<text x="941.0" y="552.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="19" font-weight="700" fill="#111827" >NVIDIA A100</text>
<text x="941.0" y="578.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="17" font-weight="600" fill="#4B5563" >vLLM CUDA</text>
<text x="590.0" y="622.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="500" fill="#4B5563" >Live streaming, no past rewrite, 250 ms holdback; model load excluded.</text>
<text x="590.0" y="648.0" text-anchor="middle" font-family="Arial, Helvetica, sans-serif" font-size="15" font-weight="500" fill="#4B5563" >H100 uses HF Transformers/CUDA; A100 uses vLLM CUDA; Metal uses vLLM Metal.</text>
</svg>