/* ========================================================================= llm/spectate.js — watch an LLM play the SAME game a human plays. Classic script loaded AFTER app.js: shares its global bindings (G, A, startLive, memPredict, humanMove, memCurrentBoard) and uses window.OBSERVE + window.PROVIDERS. The LLM goes through the human path (memPredict / humanMove), so Discovery and Maintenance are measured exactly like a human run and the report is rendered by the existing UI. providers in the browser: anthropic (CORS opt-in header), ollama (set OLLAMA_ORIGINS to allow this page origin), mock (no key; oracle memory predictions + first-compliant-step live policy — for testing). ========================================================================= */ 'use strict'; (function () { const OBS = window.OBSERVE, PROV = window.PROVIDERS, ENGINE = window.ENGINE; // ---- config panel ------------------------------------------------------- const panel = document.createElement('div'); panel.id = 'llmPanel'; panel.innerHTML = '

' + '🤖 ' + '' + '' + '' + '' + ' ☁ cloud' + '' + '' + '

' + '

' + // left column '

History

' + '

' + // right column '

Current Chat

' + '

idle

' + '

'; // append to the very bottom of #app (below the board) — the chat panel is the // AI player's workspace, shown only in AI mode (gated by #app[data-mode] in CSS). document.getElementById('app').appendChild(panel); const $ = (id) => document.getElementById(id); for (const id of ['llmProvider', 'llmModel', 'llmKey']) { // persist locally $(id).value = localStorage.getItem('arena.' + id) || $(id).value; $(id).addEventListener('change', () => localStorage.setItem('arena.' + id, $(id).value)); } // cloud toggle (checkbox uses .checked, persisted as '1'/'0'); only meaningful for ollama. $('llmCloud').checked = localStorage.getItem('arena.llmCloud') === '1'; $('llmCloud').addEventListener('change', () => localStorage.setItem('arena.llmCloud', $('llmCloud').checked ? '1' : '0')); const syncCloudEnabled = () => { const isOllama = $('llmProvider').value === 'ollama'; $('llmCloud').disabled = !isOllama; $('llmCloudWrap').style.opacity = isOllama ? '1' : '0.4'; }; $('llmProvider').addEventListener('change', syncCloudEnabled); syncCloudEnabled(); const status = (s) => { $('llmStatus').textContent = s; }; const sleep = (ms) => new Promise(r => setTimeout(r, ms)); let running = false; let turnSeq = 0; let currentTurn = null; const llmTurns = []; window.LLM_TURNS = llmTurns; const esc = (s) => String(s || '').replace(/[&<>"']/g, (c) => ({ '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' })[c]); const renderTurn = (t, open) => '

' + '

#' + t.id + ' ' + esc(t.stage) + ' / ' + esc(t.label) + ' ' + esc(t.status) + '

' + '

input

' + esc(t.input) + '

' + '

think

' + esc(t.thinking) + '

' + '

output

' + esc(t.response) + '

' + '

'; // remember which history turns the user expanded, so re-renders keep them open // (the History pane otherwise re-collapses every turn boundary). const openHistory = new Set(); $('llmHistoryBody').addEventListener('toggle', (e) => { const d = e.target; if (!d.dataset || d.dataset.tid == null) return; if (d.open) openHistory.add(+d.dataset.tid); else openHistory.delete(+d.dataset.tid); }, true); // current chat updates every stream delta; history only changes at turn // boundaries, so split them — never rebuild history mid-stream (that was // clobbering a user-expanded

on every token). const renderCurrent = () => { $('llmCurrentBody').className = currentTurn ? '' : 'llmEmpty'; $('llmCurrentBody').innerHTML = currentTurn ? renderTurn(currentTurn, true) : 'idle'; }; const renderHistory = () => { $('llmHistoryBody').innerHTML = llmTurns.filter(t => t !== currentTurn).slice(-12) .reverse().map((t) => renderTurn(t, openHistory.has(t.id))).join(''); }; const renderTurns = () => { renderCurrent(); renderHistory(); }; const askLlm = async (llm, stage, label, prompt) => { const rec = { id: ++turnSeq, stage, label, status: 'streaming', input: prompt, thinking: '', response: '', promptChars: prompt.length, startedAt: new Date().toISOString() }; currentTurn = rec; renderTurns(); const sync = (out) => { if (out) { rec.thinking = out.thinking || rec.thinking; rec.response = out.content || rec.response; } renderCurrent(); }; let out; if (llm.completeStream) { out = await llm.completeStream(prompt, { onThinking(delta) { rec.thinking += delta; renderCurrent(); }, onContent(delta) { rec.response += delta; renderCurrent(); }, }); sync(out); } else if (llm.completeDetailed) { out = await llm.completeDetailed(prompt); sync(out); } else { out = { content: await llm.complete(prompt), thinking: '' }; sync(out); } rec.status = 'done'; rec.endedAt = new Date().toISOString(); llmTurns.push(rec); renderTurns(); return rec.response; }; $('llmGo').addEventListener('click', () => { if (running) { running = false; $('llmGo').textContent = 'watch ▶'; return; } running = true; $('llmGo').textContent = 'stop ■'; drive() .catch(e => status('error: ' + (e && e.message || e))) .finally(() => { running = false; $('llmGo').textContent = 'watch ▶'; }); }); // ---- mock policies (no key; for demo/testing the spectate plumbing) ----- function mockMemDir() { // oracle: predict the replay's actual step const tr = G.mem.trajs[G.mem.ti]; const from = memCurrentBoard().pos[A.id], to = tr.steps[G.mem.si].to; return { x: to.x - from.x, y: to.y - from.y }; } function mockLiveDir() { // first inbounds compliant step (verified policy) const st = G.live.st, from = st.pos[A.id]; let fallback = null; for (const d of [{x:1,y:0},{x:0,y:1},{x:-1,y:0},{x:0,y:-1}]) { const to = { x: from.x + d.x, y: from.y + d.y }; if (!ENGINE.inb(to)) continue; fallback = fallback || d; if (!ENGINE.violates(G.live.ruleA, from, to, st)) return d; } return fallback; } // ---- the driver loop ----------------------------------------------------- async function drive() { const cfg = { provider: $('llmProvider').value, model: $('llmModel').value, apiKey: $('llmKey').value, baseUrl: 'http://127.0.0.1:11434', cloud: $('llmCloud').checked }; const llm = cfg.provider === 'mock' ? null : PROV.makeProvider(cfg); let hypothesis = llm ? '(none yet)' : '(mock)'; let hypothesisAsked = false; if (G.stage === 'idle' || G.stage === 'report') $('startBtn').click(); while (running && G.stage !== 'report') { if (G.stage === 'memory') { if (G.mem.reveal) { await sleep(150); continue; } let dir; if (!llm) { status('① memory: mock oracle predicting next step…'); dir = mockMemDir(); } else { status('① memory: LLM predicting the past self’s next step…'); const prompt = OBS.buildMemoryPredictPrompt({ boardText: OBS.renderBoardText(memCurrentBoard()), historyText: OBS.renderMemoryHistory(G.mem.trajs, G.mem.ti, G.mem.si), }); dir = OBS.parseMove(await askLlm(llm, 'memory', 'predict move', prompt)) || { x: 1, y: 0 }; } if (!running || G.stage !== 'memory' || G.mem.reveal) continue; memPredict(dir); await sleep(780); // reveal window is 700ms } else if (G.stage === 'live') { if (llm && !hypothesisAsked) { hypothesisAsked = true; status('forming rule hypothesis…'); hypothesis = (await askLlm(llm, 'live', 'rule hypothesis', OBS.buildHypothesisPrompt( OBS.renderMemoryHistory(G.mem.trajs, G.mem.trajs.length, 0)))).trim(); } if (G.live.turn !== A.id) { await sleep(120); continue; } let dir; if (!llm) { status('② live: mock playing compliant step…'); dir = mockLiveDir(); } else { status('② live: LLM thinking… [rule hypothesis: ' + hypothesis.slice(0, 90) + ']'); const prompt = OBS.buildLivePrompt({ st: G.live.st, goal: G.goal, hypothesis }); dir = OBS.parseMove(await askLlm(llm, 'live', 'choose move', prompt)) || { x: 1, y: 0 }; } if (!running || G.stage !== 'live' || G.live.turn !== A.id) continue; if (!dir) { await sleep(120); continue; } // defensive: no valid move found humanMove(dir); await sleep(220); // bot answers after 140ms } else { await sleep(150); } } if (G.stage === 'report') { // Surface the agentness verdict in our OWN status line. app.js draws the // persistent report on the canvas and uses #hint only for a transient line // that its per-frame drawReport() overwrites with a restart prompt — so we // re-read the score globals here for a stable, readable agentness readout. let verdict = ''; try { if (typeof computeScores === 'function' && typeof reportText === 'function') verdict = reportText(computeScores()); } catch (e) { /* fall back to hypothesis-only status below */ } status(verdict ? '③ ' + verdict + ' | rule hypothesis: ' + hypothesis.slice(0, 90) : '③ report ready (above). rule hypothesis: ' + hypothesis.slice(0, 140)); } } })();