#!/usr/bin/env node import { spawn } from "node:child_process"; import { existsSync } from "node:fs"; import { mkdir, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { dirname, resolve } from "node:path"; import { sourceFingerprint } from "./source-fingerprint.mjs"; const url = process.env.BROWSER_SPEAK_URL ?? "http://127.0.0.1:5174/"; const chrome = process.env.CHROME_BIN ?? (existsSync("/opt/google/chrome/chrome") ? "/opt/google/chrome/chrome" : existsSync("/usr/bin/google-chrome") ? "/usr/bin/google-chrome" : "chromium"); const prompt = process.env.BROWSER_SPEAK_PROMPT ?? "What app is this?"; const fixturePath = resolve(process.env.BROWSER_SPEAK_FAKE_MIC_WAV ?? `${tmpdir()}/browser-speak-what-app-is-this.wav`); const resultPath = resolve(process.env.BROWSER_SPEAK_FAKE_MIC_JSON ?? `${tmpdir()}/browser-speak-fake-mic-results.json`); const profileDir = resolve(process.env.BROWSER_SPEAK_PROFILE_DIR ?? `${tmpdir()}/browser-speak-benchmark-profile`); const reuseFixture = process.env.BROWSER_SPEAK_REUSE_FAKE_MIC_WAV === "true"; const protocolTimeoutMs = Number(process.env.BROWSER_SPEAK_CDP_TIMEOUT_MS ?? 60000); const pollTimeoutMs = Number(process.env.BROWSER_SPEAK_CDP_POLL_TIMEOUT_MS ?? 5000); const fakeMicTurnTimeoutMs = Number(process.env.BROWSER_SPEAK_FAKE_MIC_TURN_TIMEOUT_MS ?? 80000); const fakeMicRowTimeoutMs = Number( process.env.BROWSER_SPEAK_FAKE_MIC_ROW_TIMEOUT_MS ?? fakeMicTurnTimeoutMs + 30000, ); const stack = { device: process.env.BROWSER_SPEAK_DEVICE ?? "wasm", llm: process.env.BROWSER_SPEAK_LLM ?? "HuggingFaceTB/SmolLM2-135M-Instruct", asr: process.env.BROWSER_SPEAK_ASR ?? "onnx-community/moonshine-base-ONNX", voice: process.env.BROWSER_SPEAK_VOICE ?? "F2", ttsSteps: Number(process.env.BROWSER_SPEAK_TTS_STEPS ?? 2), vadSilenceMs: Number(process.env.BROWSER_SPEAK_VAD_SILENCE_MS ?? 480), partialAsr: process.env.BROWSER_SPEAK_PARTIAL_ASR !== "false", }; async function main() { await ensureServer(); await mkdir(dirname(fixturePath), { recursive: true }); await mkdir(dirname(resultPath), { recursive: true }); await rm(profileDir, { recursive: true, force: true }); if (reuseFixture && existsSync(fixturePath)) { console.log(`Reusing fake microphone fixture: ${fixturePath}`); } else { const fixtureBrowser = launchBrowser(9341, profileDir); try { const client = await connectToPage(9341, fixtureBrowser); await waitForBenchApi(client); await runPageTask(client, `window.browserSpeakBench.loadStack(${JSON.stringify(stack)})`, { label: "fixture model load", timeoutMs: 240000, }); const fixture = await runPageTask( client, `window.browserSpeakBench.synthesizeAudio(${JSON.stringify({ text: prompt, voice: stack.voice, steps: 2, speed: 1.05 })})`, { label: "fixture synthesis", timeoutMs: 90000, }, ); await writeFile(fixturePath, wavBuffer(fixture.audio, fixture.sampleRate)); await client.closeBrowser(); console.log(`Wrote fake microphone fixture: ${fixturePath}`); } finally { await stopBrowser(fixtureBrowser, profileDir, { cleanupProfile: false }); } } const fakeMicBrowser = launchBrowser(9342, profileDir, [ "--use-fake-ui-for-media-stream", "--use-fake-device-for-media-stream", `--use-file-for-fake-audio-capture=${fixturePath}`, ]); let client = null; try { client = await connectToPage(9342, fakeMicBrowser); await waitForBenchApi(client); await runPageTask(client, `window.browserSpeakBench.loadStack(${JSON.stringify(stack)})`, { label: "fake-mic model load", timeoutMs: 240000, }); await runPageTask(client, "window.browserSpeakBench.clearResults()", { label: "clear results" }); for (let i = 1; i <= 3; i += 1) { console.log(`Running fake microphone scripted row ${i}/3`); const snapshot = await runPageTask( client, `window.browserSpeakBench.runMic(${JSON.stringify({ timeoutMs: fakeMicTurnTimeoutMs, stopMicAfterTranscript: true, requireExactTranscript: true, })})`, { label: `fake microphone row ${i}`, timeoutMs: fakeMicRowTimeoutMs, }, ); const latest = snapshot.results[0]; if (!latest || latest.kind !== "mic" || latest.error) { await writeFailurePayload(client, latest?.error || `Missing fake microphone row ${i}.`); throw new Error(latest?.error || `Missing fake microphone row ${i}.`); } await runPageTask(client, "window.browserSpeakBench.stopMic()", { label: "stop microphone" }); } const exportPayload = await runPageTask(client, "window.browserSpeakBench.exportResults()", { label: "export results", }); exportPayload.sourceFingerprint = await sourceFingerprint(); await writeFile(resultPath, `${JSON.stringify(exportPayload, null, 2)}\n`); const current = exportPayload.summary.current ?? exportPayload.summary.all; const micRows = exportPayload.results.filter((result) => result.kind === "mic" && !result.error); console.log(`Wrote fake microphone benchmark JSON: ${resultPath}`); console.log(`Scripted mic rows: ${current.micRuns}/${current.micTargetRuns}`); console.log(`Median mic ASR: ${formatMs(median(micRows.map((result) => result.asrMs)))}`); console.log(`Median mic WER: ${formatPercent(current.micMedianWer)}`); console.log(`Median mic first token: ${formatMs(median(micRows.map((result) => result.firstTokenMs)))}`); console.log(`Median mic speech-end-to-audio: ${formatMs(current.micMedianSpeechEndToFirstAudioMs)}`); console.log(`Median mic speech-end-to-audio-done: ${formatMs(current.micMedianSpeechEndToAudioEndMs)}`); await client.closeBrowser(); } catch (error) { if (client) await writeFailurePayload(client, error.message).catch(() => {}); throw error; } finally { await stopBrowser(fakeMicBrowser, profileDir); } } async function writeFailurePayload(client, error) { const snapshot = await client .evaluate("window.browserSpeakBench?.state?.() ?? null", pollTimeoutMs) .catch(() => null); const fingerprint = await sourceFingerprint().catch(() => null); await writeFile( resultPath, `${JSON.stringify( snapshot ? { error, sourceFingerprint: fingerprint, snapshot } : { error, sourceFingerprint: fingerprint }, null, 2, )}\n`, ); console.log(`Wrote fake microphone failure JSON: ${resultPath}`); } async function ensureServer() { const response = await fetch(url).catch((error) => { throw new Error(`Could not reach ${url}: ${error.message}`); }); if (!response.ok) throw new Error(`${url} returned HTTP ${response.status}`); } async function waitForBenchApi(client) { const deadline = Date.now() + 15000; while (Date.now() < deadline) { try { if (await client.evaluate("Boolean(window.browserSpeakBench)")) return; } catch { // The target may still be navigating and can destroy the execution context. } await sleep(100); } throw new Error("window.browserSpeakBench was not installed."); } async function runPageTask(client, expression, { label = "page task", timeoutMs = 30000 } = {}) { const taskId = `task_${Date.now()}_${Math.random().toString(16).slice(2)}`; await client.evaluate(`(() => { const taskId = ${JSON.stringify(taskId)}; window.__browserSpeakHarnessTasks ||= {}; window.__browserSpeakHarnessTasks[taskId] = { done: false, label: ${JSON.stringify(label)} }; Promise.resolve(${expression}) .then((value) => { window.__browserSpeakHarnessTasks[taskId] = { done: true, value }; }) .catch((error) => { window.__browserSpeakHarnessTasks[taskId] = { done: true, error: error?.stack || error?.message || String(error), }; }); return true; })()`); const deadline = Date.now() + timeoutMs; let lastEvents = []; let lastPollError = ""; while (Date.now() < deadline) { let task = null; try { task = await client.evaluate( `window.__browserSpeakHarnessTasks?.[${JSON.stringify(taskId)}] ?? null`, pollTimeoutMs, ); lastPollError = ""; } catch (error) { const message = error.message ?? String(error); if (message !== lastPollError) { console.log(`${label}: waiting for page response (${message})`); lastPollError = message; } await sleep(500); continue; } if (task?.done) { await client .evaluate(`delete window.__browserSpeakHarnessTasks?.[${JSON.stringify(taskId)}]`, pollTimeoutMs) .catch(() => {}); if (task.error) throw new Error(`${label} failed: ${task.error}`); return task.value; } const snapshot = await client .evaluate(`(() => { const state = window.browserSpeakBench?.state?.(); return state ? { modelsLoaded: state.modelsLoaded, modelsLoading: state.modelsLoading, activeBenchmark: state.activeBenchmark?.kind ?? null, events: state.events?.slice(0, 3) ?? [], } : null; })()`, pollTimeoutMs) .catch(() => null); const events = snapshot?.events ?? []; if (events.join("\\n") !== lastEvents.join("\\n")) { lastEvents = events; if (events[0]) console.log(`${label}: ${events[0]}`); } await sleep(500); } throw new Error(`${label} timed out after ${(timeoutMs / 1000).toFixed(0)} seconds.`); } function launchBrowser(port, profileDir, extraArgs = []) { const child = spawn( chrome, [ "--headless=new", "--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage", "--disable-background-networking", "--disable-extensions", "--no-default-browser-check", "--no-first-run", "--autoplay-policy=no-user-gesture-required", `--remote-debugging-port=${port}`, `--user-data-dir=${profileDir}`, ...extraArgs, ], { stdio: ["ignore", "pipe", "pipe"] }, ); child.browserLog = ""; const appendLog = (chunk) => { child.browserLog = `${child.browserLog}${chunk}`; if (child.browserLog.length > 8000) child.browserLog = child.browserLog.slice(-8000); }; child.stdout.on("data", appendLog); child.stderr.on("data", appendLog); return child; } async function stopBrowser(child, profileDir, { cleanupProfile = true } = {}) { if (child.exitCode == null) child.kill("SIGTERM"); await new Promise((resolve) => { child.once("exit", resolve); setTimeout(resolve, 3000); }); if (child.exitCode == null) child.kill("SIGKILL"); if (!cleanupProfile) return; for (let attempt = 0; attempt < 5; attempt += 1) { try { await rm(profileDir, { recursive: true, force: true }); return; } catch (error) { if (attempt === 4) { console.warn(`Could not remove ${profileDir}: ${error.message}`); return; } await sleep(500); } } } async function connectToPage(port, child) { const deadline = Date.now() + 60000; let lastError = null; while (Date.now() < deadline) { if (child.exitCode != null) { throw new Error(`Chrome exited before DevTools became available.\n${child.browserLog}`); } try { const version = await fetch(`http://127.0.0.1:${port}/json/version`).then((response) => response.json()); if (version.webSocketDebuggerUrl) { const page = await createPageTarget(port); return new CdpClient(page.webSocketDebuggerUrl); } } catch (error) { lastError = error; } await sleep(250); } throw new Error( `Could not connect to Chrome DevTools on port ${port}: ${lastError?.message ?? "unknown error"}\n${child.browserLog}`, ); } async function createPageTarget(port) { for (const method of ["PUT", "GET"]) { const response = await fetch(`http://127.0.0.1:${port}/json/new?${encodeURIComponent(url)}`, { method, }).catch(() => null); if (response?.ok) { const target = await response.json(); if (target.webSocketDebuggerUrl) return target; } } const targets = await fetch(`http://127.0.0.1:${port}/json`).then((response) => response.json()); const page = targets.find((target) => target.type === "page" && target.url === url); if (page?.webSocketDebuggerUrl) return page; throw new Error("Could not create or find a page target."); } class CdpClient { constructor(webSocketUrl) { this.nextId = 1; this.pending = new Map(); this.socket = new WebSocket(webSocketUrl); this.opened = new Promise((resolve, reject) => { this.socket.onopen = resolve; this.socket.onerror = reject; this.socket.onmessage = (event) => this.onMessage(event); }); } onMessage(event) { const message = JSON.parse(event.data); if (!message.id || !this.pending.has(message.id)) return; const { resolve: onResolve, reject } = this.pending.get(message.id); this.pending.delete(message.id); if (message.error) reject(new Error(message.error.message)); else onResolve(message.result); } async call(method, params = {}, timeoutMs = protocolTimeoutMs) { await this.opened; const id = this.nextId++; this.socket.send(JSON.stringify({ id, method, params })); return new Promise((resolvePromise, reject) => { const timer = setTimeout(() => { this.pending.delete(id); reject(new Error(`${method} timed out after ${(timeoutMs / 1000).toFixed(0)} seconds.`)); }, timeoutMs); this.pending.set(id, { resolve: (value) => { clearTimeout(timer); resolvePromise(value); }, reject: (error) => { clearTimeout(timer); reject(error); }, }); }); } async evaluate(expression, timeoutMs = protocolTimeoutMs) { const result = await this.call("Runtime.evaluate", { expression, returnByValue: true, }, timeoutMs); if (result.exceptionDetails) throw new Error(formatException(result.exceptionDetails)); return result.result.value; } async evaluateAsync(expression, timeoutMs = protocolTimeoutMs) { const result = await this.call("Runtime.evaluate", { expression, awaitPromise: true, returnByValue: true, }, timeoutMs); if (result.exceptionDetails) throw new Error(formatException(result.exceptionDetails)); return result.result.value; } closeBrowser() { try { this.socket.send(JSON.stringify({ id: this.nextId++, method: "Browser.close", params: {} })); } catch { // The surrounding process cleanup handles already-closed targets. } try { this.socket.close(); } catch { // Ignore close races. } } } function formatException(exceptionDetails) { const exception = exceptionDetails.exception; return exception?.description ?? exception?.value ?? exceptionDetails.text ?? "Evaluation failed."; } function wavBuffer(samples, sourceSampleRate) { const targetRate = 48000; const lead = new Float32Array(Math.round(targetRate * 2.0)); const speech = normalizeSpeech(resample(samples, sourceSampleRate, targetRate)); const trail = new Float32Array(Math.round(targetRate * 2.0)); const combined = new Float32Array(lead.length + speech.length + trail.length); combined.set(lead, 0); combined.set(speech, lead.length); combined.set(trail, lead.length + speech.length); const dataBytes = combined.length * 2; const buffer = Buffer.alloc(44 + dataBytes); buffer.write("RIFF", 0); buffer.writeUInt32LE(36 + dataBytes, 4); buffer.write("WAVE", 8); buffer.write("fmt ", 12); buffer.writeUInt32LE(16, 16); buffer.writeUInt16LE(1, 20); buffer.writeUInt16LE(1, 22); buffer.writeUInt32LE(targetRate, 24); buffer.writeUInt32LE(targetRate * 2, 28); buffer.writeUInt16LE(2, 32); buffer.writeUInt16LE(16, 34); buffer.write("data", 36); buffer.writeUInt32LE(dataBytes, 40); for (let i = 0; i < combined.length; i += 1) { const value = Math.max(-1, Math.min(1, combined[i])); buffer.writeInt16LE(Math.round(value * 32767), 44 + i * 2); } return buffer; } function normalizeSpeech(samples) { let peak = 0; for (const sample of samples) { peak = Math.max(peak, Math.abs(sample)); } if (peak < 0.01) return samples; const gain = Math.min(3, 0.85 / peak); const output = new Float32Array(samples.length); for (let i = 0; i < samples.length; i += 1) { output[i] = samples[i] * gain; } return output; } function resample(samples, sourceRate, targetRate) { if (sourceRate === targetRate) return Float32Array.from(samples); const output = new Float32Array(Math.ceil((samples.length * targetRate) / sourceRate)); for (let i = 0; i < output.length; i += 1) { const position = (i * sourceRate) / targetRate; const left = Math.floor(position); const right = Math.min(samples.length - 1, left + 1); const weight = position - left; output[i] = samples[left] * (1 - weight) + samples[right] * weight; } return output; } function formatMs(value) { if (!Number.isFinite(value)) return "-"; if (value < 1000) return `${Math.round(value)} ms`; return `${(value / 1000).toFixed(2)} s`; } function formatPercent(value) { if (!Number.isFinite(value)) return "-"; return `${Math.round(value * 100)}%`; } function median(values) { const finite = values.filter(Number.isFinite).sort((a, b) => a - b); if (finite.length === 0) return null; const middle = Math.floor(finite.length / 2); if (finite.length % 2 === 1) return finite[middle]; return (finite[middle - 1] + finite[middle]) / 2; } function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } await main();