#!/usr/bin/env node import { spawn } from "node:child_process"; import { existsSync } from "node:fs"; import { mkdir, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { dirname, resolve } from "node:path"; import { sourceFingerprint } from "./source-fingerprint.mjs"; const url = process.env.BROWSER_SPEAK_URL ?? "http://127.0.0.1:5174/"; const chrome = process.env.CHROME_BIN ?? (existsSync("/opt/google/chrome/chrome") ? "/opt/google/chrome/chrome" : existsSync("/usr/bin/google-chrome") ? "/usr/bin/google-chrome" : "chromium"); const dryRun = process.env.BROWSER_SPEAK_REAL_MIC_DRY_RUN === "true"; const resultPath = resolve( process.env.BROWSER_SPEAK_REAL_MIC_JSON ?? `${tmpdir()}/browser-speak-real-mic-series${dryRun ? "-dry-run" : ""}.json`, ); const profileDir = resolve(process.env.BROWSER_SPEAK_REAL_MIC_PROFILE_DIR ?? `${tmpdir()}/browser-speak-real-mic-profile`); const protocolTimeoutMs = Number(process.env.BROWSER_SPEAK_CDP_TIMEOUT_MS ?? 60000); const pollTimeoutMs = Number(process.env.BROWSER_SPEAK_CDP_POLL_TIMEOUT_MS ?? 5000); const loadTimeoutMs = Number(process.env.BROWSER_SPEAK_LOAD_TIMEOUT_MS ?? 900000); const rowTimeoutMs = Number(process.env.BROWSER_SPEAK_REAL_MIC_ROW_TIMEOUT_MS ?? 150000); const count = Math.max(1, Number(process.env.BROWSER_SPEAK_REAL_MIC_COUNT ?? 3)); const headless = process.env.BROWSER_SPEAK_HEADLESS === "true"; const reuseProfile = process.env.BROWSER_SPEAK_REAL_MIC_REUSE_PROFILE === "true"; const autoAcceptMic = process.env.BROWSER_SPEAK_REAL_MIC_AUTO_ACCEPT !== "false"; const allowFakeCapture = process.env.BROWSER_SPEAK_REAL_MIC_ALLOW_FAKE_CAPTURE === "true"; const keepBrowserOpen = process.env.BROWSER_SPEAK_KEEP_BROWSER_OPEN === "true"; const requireExactTranscript = process.env.BROWSER_SPEAK_REAL_MIC_REQUIRE_EXACT === "true"; const ttsWarmup = process.env.BROWSER_SPEAK_TTS_WARMUP !== "false"; const stack = { device: process.env.BROWSER_SPEAK_DEVICE ?? "wasm", llm: process.env.BROWSER_SPEAK_LLM ?? "HuggingFaceTB/SmolLM2-135M-Instruct", asr: process.env.BROWSER_SPEAK_ASR ?? "onnx-community/moonshine-base-ONNX", voice: process.env.BROWSER_SPEAK_VOICE ?? "F2", ttsSteps: Number(process.env.BROWSER_SPEAK_TTS_STEPS ?? 2), vadSilenceMs: Number(process.env.BROWSER_SPEAK_VAD_SILENCE_MS ?? 480), partialAsr: process.env.BROWSER_SPEAK_PARTIAL_ASR !== "false", ttsWarmup, }; async function main() { if (dryRun) { await mkdir(dirname(resultPath), { recursive: true }); const payload = { generatedAt: new Date().toISOString(), sourceFingerprint: await sourceFingerprint(), dryRun: true, url, config: runConfig(), preflight: realMicPreflight(), plannedRows: count, prompt: "What app is this?", }; await writeFile(resultPath, `${JSON.stringify(payload, null, 2)}\n`); console.log(`Wrote real microphone dry-run JSON: ${resultPath}`); console.log(`Planned ${count} real microphone row(s).`); for (const warning of payload.preflight.warnings) console.log(`Preflight warning: ${warning}`); return; } const preflight = realMicPreflight(); if (!preflight.ok) throw new Error(preflight.warnings.join(" ")); await ensureServer(); await mkdir(dirname(resultPath), { recursive: true }); if (!reuseProfile) await rm(profileDir, { recursive: true, force: true }); const browser = launchBrowser(9347, profileDir); let client = null; try { client = await connectToPage(9347, browser); await waitForBenchApi(client); await runPageTask(client, `window.browserSpeakBench.loadStack(${JSON.stringify(stack)})`, { label: "real-mic model load", timeoutMs: loadTimeoutMs, }); await runPageTask(client, "window.browserSpeakBench.clearResults()", { label: "clear results" }); console.log(`Real microphone validation: say "What app is this?" for each of ${count} row(s).`); console.log("Keep the browser window focused and allow microphone access if Chrome asks."); for (let i = 1; i <= count; i += 1) { console.log(`Real microphone row ${i}/${count}: say "What app is this?"`); await runPageTask( client, `window.browserSpeakBench.runMic(${JSON.stringify({ timeoutMs: Math.max(1000, rowTimeoutMs - 5000), requireExactTranscript, })})`, { label: `real microphone row ${i}`, timeoutMs: rowTimeoutMs, }, ); } await runPageTask(client, "window.browserSpeakBench.stopMic()", { label: "stop microphone", timeoutMs: 30000 }); const exportPayload = await runPageTask(client, "window.browserSpeakBench.exportResults()", { label: "export results", timeoutMs: 30000, }); const rows = exportPayload.results.filter((row) => row.kind === "mic").slice(0, count); const errors = rows.filter((row) => row.error); const exactRows = rows.filter((row) => row.sttWer === 0).length; const payload = { generatedAt: new Date().toISOString(), sourceFingerprint: await sourceFingerprint(), url, passed: rows.length >= count && errors.length === 0 && (!requireExactTranscript || exactRows >= count), config: { ...runConfig(), }, hostMetadata: exportPayload.hostMetadata ?? null, runtime: exportPayload.runtime ?? null, evidence: exportPayload.evidence ?? null, summary: summarizeRows(rows), exportSummary: exportPayload.summary, browserExport: exportPayload, results: exportPayload.results, }; await writeFile(resultPath, `${JSON.stringify(payload, null, 2)}\n`); console.log(`Wrote real microphone benchmark JSON: ${resultPath}`); console.log( [ `${rows.length}/${count} rows`, `${exactRows}/${rows.length} exact transcripts`, `median WER ${formatPercent(payload.summary.medianWer)}`, `median speech-end-to-audio ${formatMs(payload.summary.medianSpeechEndToFirstAudioMs)}`, ].join(", "), ); if (!payload.passed) process.exitCode = 1; if (!keepBrowserOpen) await client.closeBrowser(); } catch (error) { await writeFailurePayload(client, browser, error).catch(() => {}); if (client && !keepBrowserOpen) await client.closeBrowser().catch(() => {}); throw error; } finally { if (!keepBrowserOpen) await stopBrowser(browser, reuseProfile ? null : profileDir); } } async function writeFailurePayload(client, browser, error) { const snapshot = client ? await client.evaluate("window.browserSpeakBench?.state?.() ?? null", pollTimeoutMs).catch(() => null) : null; await writeFile( resultPath, `${JSON.stringify( { generatedAt: new Date().toISOString(), sourceFingerprint: await sourceFingerprint(), passed: false, error: error.stack ?? error.message ?? String(error), browserLog: browser?.browserLog ?? "", config: { ...runConfig(), }, snapshot, }, null, 2, )}\n`, ); console.log(`Wrote real microphone failure JSON: ${resultPath}`); } function runConfig() { return { stack, count, rowTimeoutMs, requireExactTranscript, headless, dryRun, reuseProfile, autoAcceptMic, allowFakeCapture, keepBrowserOpen, chrome, profileDir, extraChromeArgs: parseChromeArgs(), }; } function realMicPreflight() { const warnings = []; const fakeCaptureArgs = parseChromeArgs().filter(isFakeCaptureArg); if (!headless && process.platform === "linux" && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) { warnings.push( "Visible Chrome needs DISPLAY or WAYLAND_DISPLAY on Linux. Run from a desktop session, set BROWSER_SPEAK_HEADLESS=true, or provide a display.", ); } if (!existsSync(chrome) && chrome.includes("/")) { warnings.push(`Chrome binary was not found: ${chrome}`); } if (!allowFakeCapture && fakeCaptureArgs.length > 0) { warnings.push( `Real microphone validation cannot use fake-capture Chrome args: ${fakeCaptureArgs.join( ", ", )}. Set BROWSER_SPEAK_REAL_MIC_ALLOW_FAKE_CAPTURE=true only for harness debugging.`, ); } return { ok: warnings.length === 0, warnings, fakeCaptureArgs, platform: process.platform, display: process.env.DISPLAY ?? "", waylandDisplay: process.env.WAYLAND_DISPLAY ?? "", }; } function summarizeRows(rows) { return { rows: rows.length, completedRows: rows.filter((row) => !row.error).length, errorRows: rows.filter((row) => row.error).length, exactTranscriptRows: rows.filter((row) => row.sttWer === 0).length, medianWer: median(rows.map((row) => row.sttWer)), medianCer: median(rows.map((row) => row.sttCer)), medianAsrMs: median(rows.map((row) => row.asrMs)), medianFirstTokenMs: median(rows.map((row) => row.firstTokenMs)), medianFirstAudioMs: median(rows.map((row) => row.firstAudioMs)), medianSpeechEndToFirstAudioMs: median(rows.map((row) => row.speechEndToFirstAudioMs)), medianSpeechEndToAudioEndMs: median(rows.map((row) => row.speechEndToAudioEndMs)), identityPasses: rows.filter((row) => row.llmQualityPass).length, transcripts: rows.map((row) => row.transcript ?? ""), errors: rows.filter((row) => row.error).map((row) => row.error), }; } async function ensureServer() { const response = await fetch(url).catch((error) => { throw new Error(`Could not reach ${url}: ${error.message}`); }); if (!response.ok) throw new Error(`${url} returned HTTP ${response.status}`); } async function waitForBenchApi(client) { const deadline = Date.now() + 15000; while (Date.now() < deadline) { try { if (await client.evaluate("Boolean(window.browserSpeakBench)")) return; } catch { // The target may still be navigating. } await sleep(100); } throw new Error("window.browserSpeakBench was not installed."); } async function runPageTask(client, expression, { label = "page task", timeoutMs = 30000 } = {}) { const taskId = `task_${Date.now()}_${Math.random().toString(16).slice(2)}`; await client.evaluate(`(() => { const taskId = ${JSON.stringify(taskId)}; window.__browserSpeakHarnessTasks ||= {}; window.__browserSpeakHarnessTasks[taskId] = { done: false, label: ${JSON.stringify(label)} }; Promise.resolve(${expression}) .then((value) => { window.__browserSpeakHarnessTasks[taskId] = { done: true, value }; }) .catch((error) => { window.__browserSpeakHarnessTasks[taskId] = { done: true, error: error?.stack || error?.message || String(error), }; }); return true; })()`); const deadline = Date.now() + timeoutMs; let lastEvents = []; let lastPollError = ""; while (Date.now() < deadline) { let task = null; try { task = await client.evaluate( `window.__browserSpeakHarnessTasks?.[${JSON.stringify(taskId)}] ?? null`, pollTimeoutMs, ); lastPollError = ""; } catch (error) { const message = error.message ?? String(error); if (message !== lastPollError) { console.log(`${label}: waiting for page response (${message})`); lastPollError = message; } await sleep(500); continue; } if (task?.done) { await client .evaluate(`delete window.__browserSpeakHarnessTasks?.[${JSON.stringify(taskId)}]`, pollTimeoutMs) .catch(() => {}); if (task.error) throw new Error(`${label} failed: ${task.error}`); return task.value; } const snapshot = await client .evaluate(`(() => { const state = window.browserSpeakBench?.state?.(); return state ? { modelsLoaded: state.modelsLoaded, modelsLoading: state.modelsLoading, micActive: state.micActive, activeBenchmark: state.activeBenchmark?.kind ?? null, micSeries: state.micSeries, events: state.events?.slice(0, 3) ?? [], } : null; })()`, pollTimeoutMs) .catch(() => null); const events = snapshot?.events ?? []; if (events.join("\n") !== lastEvents.join("\n")) { lastEvents = events; if (events[0]) console.log(`${label}: ${events[0]}`); } await sleep(500); } throw new Error(`${label} timed out after ${(timeoutMs / 1000).toFixed(0)} seconds.`); } function launchBrowser(port, profileDir) { const child = spawn( chrome, [ ...(headless ? ["--headless=new"] : []), "--no-sandbox", "--disable-dev-shm-usage", "--no-default-browser-check", "--no-first-run", "--autoplay-policy=no-user-gesture-required", ...(autoAcceptMic ? ["--use-fake-ui-for-media-stream"] : []), `--remote-debugging-port=${port}`, `--user-data-dir=${profileDir}`, ...parseChromeArgs(), ], { stdio: ["ignore", "pipe", "pipe"] }, ); child.browserLog = ""; const appendLog = (chunk) => { child.browserLog = `${child.browserLog}${chunk}`; if (child.browserLog.length > 8000) child.browserLog = child.browserLog.slice(-8000); }; child.stdout.on("data", appendLog); child.stderr.on("data", appendLog); return child; } async function stopBrowser(child, profileDir) { if (child.exitCode == null) child.kill("SIGTERM"); await new Promise((resolve) => { child.once("exit", resolve); setTimeout(resolve, 3000); }); if (child.exitCode == null) child.kill("SIGKILL"); if (!profileDir) return; for (let attempt = 0; attempt < 5; attempt += 1) { try { await rm(profileDir, { recursive: true, force: true }); return; } catch (error) { if (attempt === 4) { console.warn(`Could not remove ${profileDir}: ${error.message}`); return; } await sleep(500); } } } async function connectToPage(port, child) { const deadline = Date.now() + 60000; let lastError = null; while (Date.now() < deadline) { if (child.exitCode != null) { throw new Error(`Chrome exited before DevTools became available.\n${child.browserLog}`); } try { const version = await fetch(`http://127.0.0.1:${port}/json/version`).then((response) => response.json()); if (version.webSocketDebuggerUrl) { const page = await createPageTarget(port); return new CdpClient(page.webSocketDebuggerUrl); } } catch (error) { lastError = error; } await sleep(250); } throw new Error( `Could not connect to Chrome DevTools on port ${port}: ${lastError?.message ?? "unknown error"}\n${child.browserLog}`, ); } async function createPageTarget(port) { for (const method of ["PUT", "GET"]) { const response = await fetch(`http://127.0.0.1:${port}/json/new?${encodeURIComponent(url)}`, { method, }).catch(() => null); if (response?.ok) { const target = await response.json(); if (target.webSocketDebuggerUrl) return target; } } const targets = await fetch(`http://127.0.0.1:${port}/json`).then((response) => response.json()); const page = targets.find((target) => target.type === "page" && target.url === url); if (page?.webSocketDebuggerUrl) return page; throw new Error("Could not create or find a page target."); } class CdpClient { constructor(webSocketUrl) { this.nextId = 1; this.pending = new Map(); this.socket = new WebSocket(webSocketUrl); this.opened = new Promise((resolve, reject) => { this.socket.onopen = resolve; this.socket.onerror = reject; this.socket.onmessage = (event) => this.onMessage(event); }); } onMessage(event) { const message = JSON.parse(event.data); if (!message.id || !this.pending.has(message.id)) return; const { resolve: onResolve, reject } = this.pending.get(message.id); this.pending.delete(message.id); if (message.error) reject(new Error(message.error.message)); else onResolve(message.result); } async call(method, params = {}, timeoutMs = protocolTimeoutMs) { await this.opened; const id = this.nextId++; this.socket.send(JSON.stringify({ id, method, params })); return new Promise((resolvePromise, reject) => { const timer = setTimeout(() => { this.pending.delete(id); reject(new Error(`${method} timed out after ${(timeoutMs / 1000).toFixed(0)} seconds.`)); }, timeoutMs); this.pending.set(id, { resolve: (value) => { clearTimeout(timer); resolvePromise(value); }, reject: (error) => { clearTimeout(timer); reject(error); }, }); }); } async evaluate(expression, timeoutMs = protocolTimeoutMs) { const result = await this.call( "Runtime.evaluate", { expression, awaitPromise: true, returnByValue: true, }, timeoutMs, ); if (result.exceptionDetails) throw new Error(formatException(result.exceptionDetails)); return result.result.value; } async closeBrowser() { await this.call("Browser.close").catch(() => {}); } } function formatException(details) { return details.exception?.description ?? details.text ?? "Evaluation failed."; } function parseChromeArgs() { return (process.env.BROWSER_SPEAK_CHROME_ARGS ?? "") .split(/\s+/) .map((arg) => arg.trim()) .filter(Boolean); } function isFakeCaptureArg(arg) { return ( arg === "--use-fake-device-for-media-stream" || arg.startsWith("--use-file-for-fake-audio-capture=") || arg.startsWith("--use-file-for-fake-video-capture=") ); } function median(values) { const finite = values.filter(Number.isFinite).sort((a, b) => a - b); if (finite.length === 0) return null; const middle = Math.floor(finite.length / 2); if (finite.length % 2 === 1) return finite[middle]; return (finite[middle - 1] + finite[middle]) / 2; } function formatMs(value) { if (!Number.isFinite(value)) return "-"; if (value < 1000) return `${Math.round(value)} ms`; return `${(value / 1000).toFixed(2)} s`; } function formatPercent(value) { if (!Number.isFinite(value)) return "-"; return `${Math.round(value * 100)}%`; } function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } main().catch((error) => { console.error(error.stack ?? error.message); process.exitCode = 1; });