/* =========================================================================
llm/spectate.js — watch an LLM play the SAME game a human plays.
Classic script loaded AFTER app.js: shares its global bindings (G, A,
startLive, memPredict, humanMove, memCurrentBoard) and uses window.OBSERVE
+ window.PROVIDERS. The LLM goes through the human path (memPredict /
humanMove), so Discovery and Maintenance are measured exactly like a
human run and the report is rendered by the existing UI.
providers in the browser: anthropic (CORS opt-in header), ollama
(set OLLAMA_ORIGINS to allow this page origin), mock (no key; oracle
memory predictions + first-compliant-step live policy — for testing).
========================================================================= */
'use strict';
(function () {
const OBS = window.OBSERVE, PROV = window.PROVIDERS, ENGINE = window.ENGINE;
// ---- config panel -------------------------------------------------------
const panel = document.createElement('div');
panel.id = 'llmPanel';
panel.innerHTML =
'
' +
'' +
'' +
'' +
'' +
'' +
'' +
'
' +
'' +
'
' +
'' + // left column
'
History
' +
'' +
'' +
'' + // right column
'
Current Chat
' +
'
idle
' +
'' +
'
';
// append to the very bottom of #app (below the board) — the chat panel is the
// AI player's workspace, shown only in AI mode (gated by #app[data-mode] in CSS).
document.getElementById('app').appendChild(panel);
const $ = (id) => document.getElementById(id);
for (const id of ['llmProvider', 'llmModel', 'llmKey']) { // persist locally
$(id).value = localStorage.getItem('arena.' + id) || $(id).value;
$(id).addEventListener('change', () => localStorage.setItem('arena.' + id, $(id).value));
}
// cloud toggle (checkbox uses .checked, persisted as '1'/'0'); only meaningful for ollama.
$('llmCloud').checked = localStorage.getItem('arena.llmCloud') === '1';
$('llmCloud').addEventListener('change',
() => localStorage.setItem('arena.llmCloud', $('llmCloud').checked ? '1' : '0'));
const syncCloudEnabled = () => {
const isOllama = $('llmProvider').value === 'ollama';
$('llmCloud').disabled = !isOllama;
$('llmCloudWrap').style.opacity = isOllama ? '1' : '0.4';
};
$('llmProvider').addEventListener('change', syncCloudEnabled);
syncCloudEnabled();
const status = (s) => { $('llmStatus').textContent = s; };
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
let running = false;
let turnSeq = 0;
let currentTurn = null;
const llmTurns = [];
window.LLM_TURNS = llmTurns;
const esc = (s) => String(s || '').replace(/[&<>"']/g, (c) =>
({ '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' })[c]);
const renderTurn = (t, open) =>
'' +
'#' + t.id + ' ' + esc(t.stage) + ' / ' + esc(t.label) +
' ' + esc(t.status) + '' +
'
input
' + esc(t.input) + '
' +
'
think
' + esc(t.thinking) + '
' +
'
output
' + esc(t.response) + '
' +
'';
// remember which history turns the user expanded, so re-renders keep them open
// (the History pane otherwise re-collapses every turn boundary).
const openHistory = new Set();
$('llmHistoryBody').addEventListener('toggle', (e) => {
const d = e.target;
if (!d.dataset || d.dataset.tid == null) return;
if (d.open) openHistory.add(+d.dataset.tid); else openHistory.delete(+d.dataset.tid);
}, true);
// current chat updates every stream delta; history only changes at turn
// boundaries, so split them — never rebuild history mid-stream (that was
// clobbering a user-expanded on every token).
const renderCurrent = () => {
$('llmCurrentBody').className = currentTurn ? '' : 'llmEmpty';
$('llmCurrentBody').innerHTML = currentTurn ? renderTurn(currentTurn, true) : 'idle';
};
const renderHistory = () => {
$('llmHistoryBody').innerHTML = llmTurns.filter(t => t !== currentTurn).slice(-12)
.reverse().map((t) => renderTurn(t, openHistory.has(t.id))).join('');
};
const renderTurns = () => { renderCurrent(); renderHistory(); };
const askLlm = async (llm, stage, label, prompt) => {
const rec = { id: ++turnSeq, stage, label, status: 'streaming',
input: prompt, thinking: '', response: '', promptChars: prompt.length,
startedAt: new Date().toISOString() };
currentTurn = rec;
renderTurns();
const sync = (out) => {
if (out) {
rec.thinking = out.thinking || rec.thinking;
rec.response = out.content || rec.response;
}
renderCurrent();
};
let out;
if (llm.completeStream) {
out = await llm.completeStream(prompt, {
onThinking(delta) { rec.thinking += delta; renderCurrent(); },
onContent(delta) { rec.response += delta; renderCurrent(); },
});
sync(out);
} else if (llm.completeDetailed) {
out = await llm.completeDetailed(prompt);
sync(out);
} else {
out = { content: await llm.complete(prompt), thinking: '' };
sync(out);
}
rec.status = 'done';
rec.endedAt = new Date().toISOString();
llmTurns.push(rec);
renderTurns();
return rec.response;
};
$('llmGo').addEventListener('click', () => {
if (running) { running = false; $('llmGo').textContent = 'watch ▶'; return; }
running = true; $('llmGo').textContent = 'stop ■';
drive()
.catch(e => status('error: ' + (e && e.message || e)))
.finally(() => { running = false; $('llmGo').textContent = 'watch ▶'; });
});
// ---- mock policies (no key; for demo/testing the spectate plumbing) -----
function mockMemDir() { // oracle: predict the replay's actual step
const tr = G.mem.trajs[G.mem.ti];
const from = memCurrentBoard().pos[A.id], to = tr.steps[G.mem.si].to;
return { x: to.x - from.x, y: to.y - from.y };
}
function mockLiveDir() { // first inbounds compliant step (verified policy)
const st = G.live.st, from = st.pos[A.id];
let fallback = null;
for (const d of [{x:1,y:0},{x:0,y:1},{x:-1,y:0},{x:0,y:-1}]) {
const to = { x: from.x + d.x, y: from.y + d.y };
if (!ENGINE.inb(to)) continue;
fallback = fallback || d;
if (!ENGINE.violates(G.live.ruleA, from, to, st)) return d;
}
return fallback;
}
// ---- the driver loop -----------------------------------------------------
async function drive() {
const cfg = { provider: $('llmProvider').value, model: $('llmModel').value,
apiKey: $('llmKey').value, baseUrl: 'http://127.0.0.1:11434',
cloud: $('llmCloud').checked };
const llm = cfg.provider === 'mock' ? null : PROV.makeProvider(cfg);
let hypothesis = llm ? '(none yet)' : '(mock)';
let hypothesisAsked = false;
if (G.stage === 'idle' || G.stage === 'report') $('startBtn').click();
while (running && G.stage !== 'report') {
if (G.stage === 'memory') {
if (G.mem.reveal) { await sleep(150); continue; }
let dir;
if (!llm) { status('① memory: mock oracle predicting next step…'); dir = mockMemDir(); }
else {
status('① memory: LLM predicting the past self’s next step…');
const prompt = OBS.buildMemoryPredictPrompt({
boardText: OBS.renderBoardText(memCurrentBoard()),
historyText: OBS.renderMemoryHistory(G.mem.trajs, G.mem.ti, G.mem.si),
});
dir = OBS.parseMove(await askLlm(llm, 'memory', 'predict move', prompt)) || { x: 1, y: 0 };
}
if (!running || G.stage !== 'memory' || G.mem.reveal) continue;
memPredict(dir);
await sleep(780); // reveal window is 700ms
} else if (G.stage === 'live') {
if (llm && !hypothesisAsked) {
hypothesisAsked = true;
status('forming rule hypothesis…');
hypothesis = (await askLlm(llm, 'live', 'rule hypothesis',
OBS.buildHypothesisPrompt(
OBS.renderMemoryHistory(G.mem.trajs, G.mem.trajs.length, 0)))).trim();
}
if (G.live.turn !== A.id) { await sleep(120); continue; }
let dir;
if (!llm) { status('② live: mock playing compliant step…'); dir = mockLiveDir(); }
else {
status('② live: LLM thinking… [rule hypothesis: ' + hypothesis.slice(0, 90) + ']');
const prompt = OBS.buildLivePrompt({ st: G.live.st, goal: G.goal, hypothesis });
dir = OBS.parseMove(await askLlm(llm, 'live', 'choose move', prompt)) || { x: 1, y: 0 };
}
if (!running || G.stage !== 'live' || G.live.turn !== A.id) continue;
if (!dir) { await sleep(120); continue; } // defensive: no valid move found
humanMove(dir);
await sleep(220); // bot answers after 140ms
} else {
await sleep(150);
}
}
if (G.stage === 'report') {
// Surface the agentness verdict in our OWN status line. app.js draws the
// persistent report on the canvas and uses #hint only for a transient line
// that its per-frame drawReport() overwrites with a restart prompt — so we
// re-read the score globals here for a stable, readable agentness readout.
let verdict = '';
try {
if (typeof computeScores === 'function' && typeof reportText === 'function')
verdict = reportText(computeScores());
} catch (e) { /* fall back to hypothesis-only status below */ }
status(verdict
? '③ ' + verdict + ' | rule hypothesis: ' + hypothesis.slice(0, 90)
: '③ report ready (above). rule hypothesis: ' + hypothesis.slice(0, 140));
}
}
})();