#!/usr/bin/env node import { spawn } from "node:child_process"; import { existsSync } from "node:fs"; import { mkdir, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { dirname, resolve } from "node:path"; const url = process.env.BROWSER_SPEAK_URL ?? "http://127.0.0.1:5174/"; const chrome = process.env.CHROME_BIN ?? (existsSync("/opt/google/chrome/chrome") ? "/opt/google/chrome/chrome" : existsSync("/usr/bin/google-chrome") ? "/usr/bin/google-chrome" : "chromium"); const resultPath = resolve(process.env.BROWSER_SPEAK_LOCAL_JSON ?? `${tmpdir()}/browser-speak-local-candidates.json`); const profileDir = resolve(process.env.BROWSER_SPEAK_LOCAL_PROFILE_DIR ?? `${tmpdir()}/browser-speak-local-profile`); const protocolTimeoutMs = Number(process.env.BROWSER_SPEAK_CDP_TIMEOUT_MS ?? 60000); const pollTimeoutMs = Number(process.env.BROWSER_SPEAK_CDP_POLL_TIMEOUT_MS ?? 5000); const pageUnresponsiveTimeoutMs = Number(process.env.BROWSER_SPEAK_PAGE_UNRESPONSIVE_TIMEOUT_MS ?? 120000); const loadTimeoutMs = Number(process.env.BROWSER_SPEAK_LOAD_TIMEOUT_MS ?? 360000); const taskTimeoutMs = Number(process.env.BROWSER_SPEAK_TASK_TIMEOUT_MS ?? 180000); const ttsWarmup = process.env.BROWSER_SPEAK_TTS_WARMUP === "true"; const reuseProfile = process.env.BROWSER_SPEAK_LOCAL_REUSE_PROFILE === "true"; const headless = process.env.BROWSER_SPEAK_HEADLESS !== "false"; const dryRun = process.env.BROWSER_SPEAK_LOCAL_DRY_RUN === "true"; const localTasks = parseList(process.env.BROWSER_SPEAK_LOCAL_TASKS ?? "tts,loopback"); const stacks = buildStacks().slice(0, maxStacks()); async function main() { const config = benchmarkConfig(); if (dryRun) { const payload = { generatedAt: new Date().toISOString(), dryRun: true, url, config, candidates: stacks.map((stack) => ({ stack, status: "planned" })), }; await mkdir(dirname(resultPath), { recursive: true }); await writeJson(payload); console.log(`Wrote local candidate dry-run JSON: ${resultPath}`); console.log(`Planned ${stacks.length} stack(s): ${stacks.map(stackLabel).join("; ")}`); return; } await ensureServer(); await mkdir(dirname(resultPath), { recursive: true }); if (!reuseProfile) await rm(profileDir, { recursive: true, force: true }); const browser = launchBrowser(9344, profileDir); const candidates = stacks.map((stack) => ({ stack, status: "pending" })); try { const client = await connectToPage(9344, browser); await waitForBenchApi(client); await runPageTask(client, "window.browserSpeakBench.clearResults()", { label: "clear results" }); for (const candidate of candidates) { const label = stackLabel(candidate.stack); const startedAt = Date.now(); let pageUsable = true; console.log(`Running local candidate: ${label}`); try { const before = await runPageTask(client, "window.browserSpeakBench.state()", { label: `${label} preflight`, }); await runPageTask(client, `window.browserSpeakBench.loadStack(${JSON.stringify(candidate.stack)})`, { label: `${label} model load`, timeoutMs: loadTimeoutMs, }); for (const task of localTasks) { await runTask(client, task, label); } const snapshot = await runPageTask(client, "window.browserSpeakBench.state()", { label: `${label} state`, }); const addedCount = Math.max(0, snapshot.results.length - before.results.length); const rows = snapshot.results.slice(0, addedCount); const errors = rows.filter((row) => row.error); candidate.status = errors.length === 0 ? "complete" : "partial"; candidate.durationMs = Date.now() - startedAt; candidate.rowCount = rows.length; candidate.errorCount = errors.length; candidate.metrics = metricsForRows(rows); candidate.summary = snapshot.summary.current ?? snapshot.summary.all; if (errors.length > 0) candidate.errors = errors.map((row) => `${row.kind}: ${row.error}`); } catch (error) { candidate.status = "failed"; candidate.durationMs = Date.now() - startedAt; candidate.error = error.message; pageUsable = !isPageUnresponsiveError(error); console.error(`${label} failed: ${error.message}`); } finally { if (pageUsable) { await runPageTask(client, "window.browserSpeakBench.stop()", { label: `${label} stop`, timeoutMs: 20000, }).catch(() => {}); const state = await runPageTask(client, "window.browserSpeakBench.state()", { label: `${label} loaded state`, timeoutMs: 20000, }).catch(() => null); if (state?.modelsLoaded || state?.modelsLoading) { await runPageTask(client, "window.browserSpeakBench.unload()", { label: `${label} unload`, timeoutMs: 60000, }).catch((error) => { candidate.unloadError = error.message; }); } } else { candidate.unloadError = "Skipped page cleanup because the renderer was unresponsive."; } await writeJson(await exportPayload(client, candidates)); } if (!pageUsable) break; } const payload = await exportPayload(client, candidates); await writeJson(payload); console.log(`Wrote local candidate benchmark JSON: ${resultPath}`); summarizeCandidates(payload.candidates); await client.closeBrowser(); } finally { await stopBrowser(browser, reuseProfile ? null : profileDir); } } async function runTask(client, task, label) { const normalized = task.toLowerCase(); const loopbackOptions = { timeoutMs: 180000, ...(process.env.BROWSER_SPEAK_LOOPBACK_TEXT ? { text: process.env.BROWSER_SPEAK_LOOPBACK_TEXT } : {}), ...(Number.isFinite(Number(process.env.BROWSER_SPEAK_LOOPBACK_SPEED)) ? { speed: Number(process.env.BROWSER_SPEAK_LOOPBACK_SPEED) } : {}), }; const expression = { tts: "window.browserSpeakBench.runTts({ timeoutMs: 90000 })", identity: "window.browserSpeakBench.runIdentity({ timeoutMs: 180000 })", chat: "window.browserSpeakBench.runChat({ timeoutMs: 180000 })", loopback: `window.browserSpeakBench.runLoopback(${JSON.stringify(loopbackOptions)})`, barge: "window.browserSpeakBench.runBargeIn({ timeoutMs: 90000 })", "barge-in": "window.browserSpeakBench.runBargeIn({ timeoutMs: 90000 })", suite: "window.browserSpeakBench.runSuite()", }[normalized]; if (!expression) throw new Error(`Unknown local benchmark task: ${task}`); await runPageTask(client, expression, { label: `${label} ${normalized}`, timeoutMs: normalized === "suite" ? Math.max(taskTimeoutMs, 420000) : taskTimeoutMs, }); } async function exportPayload(client, candidates) { const exportResult = await runPageTask(client, "window.browserSpeakBench.exportResults()", { label: "export results", timeoutMs: 30000, }).catch(() => ({ summary: null, results: [] })); return { generatedAt: new Date().toISOString(), url, config: benchmarkConfig(), candidates, summary: exportResult.summary, results: exportResult.results, }; } function benchmarkConfig() { return { tasks: localTasks, stackCount: stacks.length, loadTimeoutMs, taskTimeoutMs, protocolTimeoutMs, pollTimeoutMs, pageUnresponsiveTimeoutMs, ttsWarmup, reuseProfile, headless, chrome, profileDir, extraChromeArgs: parseChromeArgs(), }; } function isPageUnresponsiveError(error) { return /page stayed unresponsive|Runtime\.evaluate timed out/i.test(error?.message ?? String(error)); } async function writeJson(payload) { await writeFile(resultPath, `${JSON.stringify(payload, null, 2)}\n`); } function buildStacks() { if (process.env.BROWSER_SPEAK_LOCAL_STACKS) { const parsed = JSON.parse(process.env.BROWSER_SPEAK_LOCAL_STACKS); if (!Array.isArray(parsed)) throw new Error("BROWSER_SPEAK_LOCAL_STACKS must be a JSON array."); return parsed.map((stack, index) => normalizeLocalStack(stack, index)); } const llms = parseList(process.env.BROWSER_SPEAK_LOCAL_LLMS ?? "HuggingFaceTB/SmolLM2-135M-Instruct"); const asrs = parseList( process.env.BROWSER_SPEAK_LOCAL_ASRS ?? "onnx-community/moonshine-base-ONNX,onnx-community/moonshine-tiny-ONNX,onnx-community/whisper-tiny.en", ); const voices = parseList(process.env.BROWSER_SPEAK_LOCAL_VOICES ?? "F2"); const steps = parseList(process.env.BROWSER_SPEAK_LOCAL_TTS_STEPS ?? "2").map((value) => Number(value)); const vadSilenceValues = parseList(process.env.BROWSER_SPEAK_LOCAL_VAD_SILENCE_MS ?? "480").map((value) => Number(value), ); const partialAsr = process.env.BROWSER_SPEAK_PARTIAL_ASR !== "false"; const stacks = []; for (const llm of llms) { for (const asr of asrs) { for (const voice of voices) { for (const ttsSteps of steps) { for (const vadSilenceMs of vadSilenceValues) { stacks.push({ device: "wasm", llm, asr, voice, ttsSteps, vadSilenceMs, partialAsr, ttsWarmup, }); } } } } } return stacks; } function normalizeLocalStack(stack, index) { return { device: "wasm", name: stack.name ?? `stack-${index + 1}`, llm: stack.llm ?? parseList(process.env.BROWSER_SPEAK_LOCAL_LLMS ?? "HuggingFaceTB/SmolLM2-135M-Instruct")[0], asr: stack.asr ?? parseList(process.env.BROWSER_SPEAK_LOCAL_ASRS ?? "onnx-community/moonshine-base-ONNX")[0], voice: stack.voice ?? parseList(process.env.BROWSER_SPEAK_LOCAL_VOICES ?? "F2")[0], ttsSteps: Number(stack.ttsSteps ?? parseList(process.env.BROWSER_SPEAK_LOCAL_TTS_STEPS ?? "2")[0]), vadSilenceMs: Number(stack.vadSilenceMs ?? parseList(process.env.BROWSER_SPEAK_LOCAL_VAD_SILENCE_MS ?? "480")[0]), partialAsr: stack.partialAsr ?? (process.env.BROWSER_SPEAK_PARTIAL_ASR !== "false"), ttsWarmup: stack.ttsWarmup ?? ttsWarmup, ...(stack.ttsChunking ? { ttsChunking: stack.ttsChunking } : {}), }; } function maxStacks() { if (!process.env.BROWSER_SPEAK_LOCAL_MAX_STACKS) return Number.POSITIVE_INFINITY; const value = Number(process.env.BROWSER_SPEAK_LOCAL_MAX_STACKS); return Number.isFinite(value) ? Math.max(0, value) : Number.POSITIVE_INFINITY; } async function ensureServer() { const response = await fetch(url).catch((error) => { throw new Error(`Could not reach ${url}: ${error.message}`); }); if (!response.ok) throw new Error(`${url} returned HTTP ${response.status}`); } async function waitForBenchApi(client) { const deadline = Date.now() + 15000; while (Date.now() < deadline) { try { if (await client.evaluate("Boolean(window.browserSpeakBench)")) return; } catch { // The target may still be navigating and can destroy the execution context. } await sleep(100); } throw new Error("window.browserSpeakBench was not installed."); } async function runPageTask(client, expression, { label = "page task", timeoutMs = 30000 } = {}) { const taskId = `task_${Date.now()}_${Math.random().toString(16).slice(2)}`; await client.evaluate(`(() => { const taskId = ${JSON.stringify(taskId)}; window.__browserSpeakHarnessTasks ||= {}; window.__browserSpeakHarnessTasks[taskId] = { done: false, label: ${JSON.stringify(label)} }; Promise.resolve(${expression}) .then((value) => { window.__browserSpeakHarnessTasks[taskId] = { done: true, value }; }) .catch((error) => { window.__browserSpeakHarnessTasks[taskId] = { done: true, error: error?.stack || error?.message || String(error), }; }); return true; })()`); const deadline = Date.now() + timeoutMs; let lastEvents = []; let lastPollError = ""; let pollErrorSince = 0; while (Date.now() < deadline) { let task = null; try { task = await client.evaluate( `window.__browserSpeakHarnessTasks?.[${JSON.stringify(taskId)}] ?? null`, pollTimeoutMs, ); lastPollError = ""; pollErrorSince = 0; } catch (error) { const message = error.message ?? String(error); pollErrorSince ||= Date.now(); if (message !== lastPollError) { console.log(`${label}: waiting for page response (${message})`); lastPollError = message; } if (Date.now() - pollErrorSince > pageUnresponsiveTimeoutMs) { throw new Error( `${label} page stayed unresponsive for ${(pageUnresponsiveTimeoutMs / 1000).toFixed(0)} seconds; last poll error: ${message}`, ); } await sleep(500); continue; } if (task?.done) { await client .evaluate(`delete window.__browserSpeakHarnessTasks?.[${JSON.stringify(taskId)}]`, pollTimeoutMs) .catch(() => {}); if (task.error) throw new Error(`${label} failed: ${task.error}`); return task.value; } const snapshot = await client .evaluate(`(() => { const state = window.browserSpeakBench?.state?.(); return state ? { modelsLoaded: state.modelsLoaded, modelsLoading: state.modelsLoading, activeBenchmark: state.activeBenchmark?.kind ?? null, suiteRunning: state.suiteRunning, events: state.events?.slice(0, 3) ?? [], } : null; })()`, pollTimeoutMs) .catch(() => null); const events = snapshot?.events ?? []; if (events.join("\\n") !== lastEvents.join("\\n")) { lastEvents = events; if (events[0]) console.log(`${label}: ${events[0]}`); } await sleep(500); } throw new Error(`${label} timed out after ${(timeoutMs / 1000).toFixed(0)} seconds.`); } function launchBrowser(port, profileDir) { const child = spawn( chrome, [ ...(headless ? ["--headless=new"] : []), "--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage", "--disable-background-networking", "--disable-extensions", "--no-default-browser-check", "--no-first-run", "--autoplay-policy=no-user-gesture-required", `--remote-debugging-port=${port}`, `--user-data-dir=${profileDir}`, ...parseChromeArgs(), ], { stdio: ["ignore", "pipe", "pipe"] }, ); child.browserLog = ""; const appendLog = (chunk) => { child.browserLog = `${child.browserLog}${chunk}`; if (child.browserLog.length > 8000) child.browserLog = child.browserLog.slice(-8000); }; child.stdout.on("data", appendLog); child.stderr.on("data", appendLog); return child; } async function stopBrowser(child, profileDir) { if (child.exitCode == null) child.kill("SIGTERM"); await new Promise((resolve) => { child.once("exit", resolve); setTimeout(resolve, 3000); }); if (child.exitCode == null) child.kill("SIGKILL"); if (!profileDir) return; for (let attempt = 0; attempt < 5; attempt += 1) { try { await rm(profileDir, { recursive: true, force: true }); return; } catch (error) { if (attempt === 4) { console.warn(`Could not remove ${profileDir}: ${error.message}`); return; } await sleep(500); } } } async function connectToPage(port, child) { const deadline = Date.now() + 60000; let lastError = null; while (Date.now() < deadline) { if (child.exitCode != null) { throw new Error(`Chrome exited before DevTools became available.\n${child.browserLog}`); } try { const version = await fetch(`http://127.0.0.1:${port}/json/version`).then((response) => response.json()); if (version.webSocketDebuggerUrl) { const page = await createPageTarget(port); return new CdpClient(page.webSocketDebuggerUrl); } } catch (error) { lastError = error; } await sleep(250); } throw new Error( `Could not connect to Chrome DevTools on port ${port}: ${lastError?.message ?? "unknown error"}\n${child.browserLog}`, ); } async function createPageTarget(port) { for (const method of ["PUT", "GET"]) { const response = await fetch(`http://127.0.0.1:${port}/json/new?${encodeURIComponent(url)}`, { method, }).catch(() => null); if (response?.ok) { const target = await response.json(); if (target.webSocketDebuggerUrl) return target; } } const targets = await fetch(`http://127.0.0.1:${port}/json`).then((response) => response.json()); const page = targets.find((target) => target.type === "page" && target.url === url); if (page?.webSocketDebuggerUrl) return page; throw new Error("Could not create or find a page target."); } class CdpClient { constructor(webSocketUrl) { this.nextId = 1; this.pending = new Map(); this.socket = new WebSocket(webSocketUrl); this.opened = new Promise((resolve, reject) => { this.socket.onopen = resolve; this.socket.onerror = reject; this.socket.onmessage = (event) => this.onMessage(event); }); } onMessage(event) { const message = JSON.parse(event.data); if (!message.id || !this.pending.has(message.id)) return; const { resolve: onResolve, reject } = this.pending.get(message.id); this.pending.delete(message.id); if (message.error) reject(new Error(message.error.message)); else onResolve(message.result); } async call(method, params = {}, timeoutMs = protocolTimeoutMs) { await this.opened; const id = this.nextId++; this.socket.send(JSON.stringify({ id, method, params })); return new Promise((resolvePromise, reject) => { const timer = setTimeout(() => { this.pending.delete(id); reject(new Error(`${method} timed out after ${(timeoutMs / 1000).toFixed(0)} seconds.`)); }, timeoutMs); this.pending.set(id, { resolve: (value) => { clearTimeout(timer); resolvePromise(value); }, reject: (error) => { clearTimeout(timer); reject(error); }, }); }); } async evaluate(expression, timeoutMs = protocolTimeoutMs) { const result = await this.call("Runtime.evaluate", { expression, returnByValue: true, }, timeoutMs); if (result.exceptionDetails) throw new Error(formatException(result.exceptionDetails)); return result.result.value; } closeBrowser() { try { this.socket.send(JSON.stringify({ id: this.nextId++, method: "Browser.close", params: {} })); } catch { // The surrounding process cleanup handles already-closed targets. } try { this.socket.close(); } catch { // Ignore close races. } } } function formatException(exceptionDetails) { const exception = exceptionDetails.exception; return exception?.description ?? exception?.value ?? exceptionDetails.text ?? "Evaluation failed."; } function parseList(value) { return String(value ?? "") .split(",") .map((item) => item.trim()) .filter(Boolean); } function parseChromeArgs() { const raw = process.env.BROWSER_SPEAK_CHROME_ARGS ?? ""; return ( raw .match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g) ?.map((arg) => arg.replace(/^["']|["']$/g, "")) ?? [] ); } function stackLabel(stack) { return [ shortModel(stack.llm), shortModel(stack.asr), stack.voice, `${stack.ttsSteps} steps`, `${stack.vadSilenceMs}ms`, stack.ttsChunking ? `chunk ${stack.ttsChunking.firstTargetChars ?? "?"}/${stack.ttsChunking.targetChars ?? "?"}` : "", ] .filter(Boolean) .join(" / "); } function shortModel(modelId = "") { return modelId.split("/").at(-1)?.replace(/-ONNX$/, "") ?? modelId; } function summarizeCandidates(candidates) { for (const candidate of candidates) { const metrics = candidate.metrics ?? {}; const parts = []; if (Number.isFinite(metrics.ttsMedianFirstAudioMs)) { parts.push(`tts first audio ${formatMs(metrics.ttsMedianFirstAudioMs)}`); } if (Number.isFinite(metrics.modelLoadMs)) { parts.push(`load ${formatMs(metrics.modelLoadMs)}`); } if (Number.isFinite(metrics.loopbackMedianSpeechEndToFirstAudioMs)) { parts.push(`loopback end-to-audio ${formatMs(metrics.loopbackMedianSpeechEndToFirstAudioMs)}`); } if (Number.isFinite(metrics.identityMedianFirstAudioMs)) { parts.push(`identity first audio ${formatMs(metrics.identityMedianFirstAudioMs)}`); } const suffix = parts.length > 0 ? `, ${parts.join(", ")}` : ""; const error = candidate.error ? ` (${candidate.error})` : ""; console.log(`${stackLabel(candidate.stack)}: ${candidate.status}, ${candidate.rowCount ?? 0} rows${suffix}${error}`); } } function metricsForRows(rows) { const completed = rows.filter((row) => !row.error); const byKind = (kind) => completed.filter((row) => row.kind === kind); const ttsRows = byKind("tts"); const identityRows = byKind("identity"); const chatRows = byKind("chat"); const loopbackRows = byKind("loopback"); return { modelLoadMs: rows.find((row) => Number.isFinite(row.stack?.modelLoadMs))?.stack.modelLoadMs ?? null, ttsRuns: ttsRows.length, ttsMedianFirstAudioMs: median(ttsRows.map((row) => row.firstAudioMs)), ttsMedianSynthesisMs: median(ttsRows.map((row) => row.firstTtsSynthesisMs)), ttsMedianAudioEndMs: median(ttsRows.map((row) => row.audioEndMs)), identityRuns: identityRows.length, identityMedianFirstTokenMs: median(identityRows.map((row) => row.firstTokenMs)), identityMedianFirstAudioMs: median(identityRows.map((row) => row.firstAudioMs)), identityPasses: identityRows.filter((row) => row.llmQualityPass).length, chatRuns: chatRows.length, chatMedianFirstTokenMs: median(chatRows.map((row) => row.firstTokenMs)), chatMedianFirstAudioMs: median(chatRows.map((row) => row.firstAudioMs)), loopbackRuns: loopbackRows.length, loopbackMedianWer: median(loopbackRows.map((row) => row.sttWer)), loopbackMedianSpeechEndToFirstAudioMs: median( loopbackRows.map((row) => row.speechEndToFirstAudioMs), ), loopbackMedianSpeechEndToAudioEndMs: median( loopbackRows.map((row) => row.speechEndToAudioEndMs), ), }; } function median(values) { const finite = values.filter(Number.isFinite).sort((a, b) => a - b); if (finite.length === 0) return null; const middle = Math.floor(finite.length / 2); if (finite.length % 2 === 1) return finite[middle]; return (finite[middle - 1] + finite[middle]) / 2; } function formatMs(value) { if (!Number.isFinite(value)) return "-"; if (value < 1000) return `${Math.round(value)} ms`; return `${(value / 1000).toFixed(2)} s`; } function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } await main();