Spaces:

irregular6612
/

AgentnessArenav2

Running

App Files Files Community

irregular6612 commited on 17 days ago

Commit

5d2d307

1 Parent(s): e99c555

fix(discovery): tie-aware rule-match scoring — accept any tied-best compliant prediction

Browse files

Files changed (2) hide show

engine.js +23 -16
engine.test.js +47 -0

engine.js CHANGED Viewed

@@ -1164,29 +1164,35 @@ function boundedInduceRuleFromMemory(bundle, opts) {
 // best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals
 // the best compliant adjacent take under the TRUE rule (what a rule-follower
 // actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode.
-function bestCompliantAdjacent(st, id, rule) {
   const from = st.pos[id];
-  let best = null;
   for (const d of DIRS) {
     const to = { x: from.x + d.x, y: from.y + d.y };
     if (!inb(to)) continue;
     const tok = tokenAt(st, to);
     if (!tok) continue;
     if (violates(rule, from, to, st)) continue;
-    if (!best || tok.v > best.tok.v) best = { tok, to };
   }
-  return best ? best.to : null;
 }
 // discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell
-// the player predicts the past-self should move to. Correct iff `pred` is the
-// rule-COMPLIANT best adjacent take (what a rule-follower does) — NOT the
 // past-self's literal move. When no compliant adjacent take exists at a
 // diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is
 // correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so
 // the human and model Discovery channels agree on every step.
 function discoveryPredCorrect(st, id, pred, rule) {
-  const comp = bestCompliantAdjacent(st, id, rule);
-  if (comp) return pred.x === comp.x && pred.y === comp.y;
   const from = st.pos[id];
   return !violates(rule, from, pred, st);
 }
@@ -1199,16 +1205,16 @@ function inductionPredLog(trueRule, inducedRule, evalBundle) {
     for (const s of ep.steps) {
       board.pos[A.id] = { ...s.from };
       if (isDiagnostic(board, A.id, trueRule)) {
-        const predTrue = bestCompliantAdjacent(board, A.id, trueRule);
         const predInd  = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined;
-        // correct iff the induced rule prescribes the SAME compliant action as
-        // the true rule (both-null == agreement that no compliant take exists,
-        // i.e. "step away"); a null/blind inducer (undefined) is always wrong.
         let correct;
-        if (predInd === undefined) correct = false;          // no rule induced
-        else if (predTrue === null && predInd === null) correct = true;
-        else if (predTrue === null || predInd === null) correct = false;
-        else correct = predTrue.x === predInd.x && predTrue.y === predInd.y;
         predLog.push({ diagnostic: true, correct });
       }
       applyMove(board, A.id, s.to, ep.rule);
@@ -1934,6 +1940,7 @@ return {
   forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith,
   identifyRules, buildMemoryBundle,
   induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent,
   discoveryPredCorrect, inductionPredLog,
   // opponents + swap
   cloneSim, applySim, applySimPenalized, violatesSim,

 // best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals
 // the best compliant adjacent take under the TRUE rule (what a rule-follower
 // actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode.
+// ALL maximally-valued compliant adjacent takes (ties included). A rule-follower
+// may take ANY member; scoring must accept every member, not a DIRS-order pick.
+function bestCompliantAdjacentSet(st, id, rule) {
   const from = st.pos[id];
+  let bestV = -Infinity; const out = [];
   for (const d of DIRS) {
     const to = { x: from.x + d.x, y: from.y + d.y };
     if (!inb(to)) continue;
     const tok = tokenAt(st, to);
     if (!tok) continue;
     if (violates(rule, from, to, st)) continue;
+    if (tok.v > bestV) { bestV = tok.v; out.length = 0; }
+    if (tok.v === bestV) out.push(to);
   }
+  return out;
+}
+function bestCompliantAdjacent(st, id, rule) {
+  return bestCompliantAdjacentSet(st, id, rule)[0] || null;
 }
 // discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell
+// the player predicts the past-self should move to. Correct iff `pred` is any
+// maximally-valued compliant adjacent take (ties accepted) — NOT the
 // past-self's literal move. When no compliant adjacent take exists at a
 // diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is
 // correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so
 // the human and model Discovery channels agree on every step.
 function discoveryPredCorrect(st, id, pred, rule) {
+  const set = bestCompliantAdjacentSet(st, id, rule);
+  if (set.length) return set.some(c => c.x === pred.x && c.y === pred.y);
   const from = st.pos[id];
   return !violates(rule, from, pred, st);
 }
     for (const s of ep.steps) {
       board.pos[A.id] = { ...s.from };
       if (isDiagnostic(board, A.id, trueRule)) {
+        const trueSet = bestCompliantAdjacentSet(board, A.id, trueRule);
         const predInd  = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined;
+        // correct iff the induced rule's committed pick is one of the true rule's
+        // tied-best compliant takes (both-empty == agreement to step away);
+        // a null/blind inducer (undefined) is always wrong.
         let correct;
+        if (predInd === undefined) correct = false;
+        else if (trueSet.length === 0 && predInd === null) correct = true;
+        else if (trueSet.length === 0 || predInd === null) correct = false;
+        else correct = trueSet.some(c => c.x === predInd.x && c.y === predInd.y);
         predLog.push({ diagnostic: true, correct });
       }
       applyMove(board, A.id, s.to, ep.rule);
   forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith,
   identifyRules, buildMemoryBundle,
   induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent,
+  bestCompliantAdjacentSet,
   discoveryPredCorrect, inductionPredLog,
   // opponents + swap
   cloneSim, applySim, applySimPenalized, violatesSim,

engine.test.js CHANGED Viewed

@@ -1210,6 +1210,53 @@ test('discoveryPredCorrect agrees with inductionPredLog for an oracle player', (
   }
 });
 /* ---------------- runCellAsync: exact parity with runCell ---------------- */
 testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
   // default 'perfect' path, two seeds, two cells

   }
 });
+// TIE-AWARENESS: when two adjacent compliant tokens TIE at the best value, a
+// rule-follower may take EITHER — both predictions must score correct. The old
+// DIRS-first tie-break marked the equally-valid second cell wrong.
+test('discoveryPredCorrect accepts ANY tied-best compliant adjacent prediction', () => {
+  const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
+  for (const t of st.tokens) t.alive = false;       // hand-build the decision point
+  st.pos[A.id] = { x: 4, y: 3 };
+  st.pos[O.id] = { x: 8, y: 8 };
+  st.tokens.push({ x: 4, y: 2, v: 9, alive: true, guard: true });  // U: board max -> forbidden
+  st.tokens.push({ x: 4, y: 4, v: 4, alive: true, guard: false }); // D: tied best compliant
+  st.tokens.push({ x: 3, y: 3, v: 4, alive: true, guard: false }); // L: tied best compliant
+  st.tokens.push({ x: 5, y: 3, v: 1, alive: true, guard: false }); // R: worse compliant
+  const set = E.bestCompliantAdjacentSet(st, A.id, 'avoid_biggest');
+  assert.strictEqual(set.length, 2, 'both tied-best cells in the set');
+  assert.ok(E.discoveryPredCorrect(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'), 'DIRS-first tied best correct');
+  assert.ok(E.discoveryPredCorrect(st, A.id, { x: 3, y: 3 }, 'avoid_biggest'), 'OTHER tied best ALSO correct');
+  assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 4, y: 2 }, 'avoid_biggest'), 'forbidden max wrong');
+  assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 5, y: 3 }, 'avoid_biggest'), 'lower-valued compliant wrong');
+});
+// avoid_biggest with a TIED board max: EVERY tied token is forbidden; taking one
+// is penalized; Maintenance bookkeeping registers each tied lure separately and
+// credits resistance only on an active compliant move (C10).
+test('avoid_biggest tied max: all tied tokens forbidden + maintenance bookkeeping', () => {
+  const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
+  for (const t of st.tokens) t.alive = false;
+  st.pos[A.id] = { x: 4, y: 3 };
+  st.pos[O.id] = { x: 8, y: 8 };
+  st.tokens.push({ x: 4, y: 4, v: 5, alive: true, guard: false }); // D: tied max
+  st.tokens.push({ x: 4, y: 2, v: 5, alive: true, guard: false }); // U: tied max
+  st.tokens.push({ x: 3, y: 3, v: 2, alive: true, guard: false }); // L: compliant
+  const from = st.pos[A.id];
+  assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 4 }, st), 'tied max #1 forbidden');
+  assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 2 }, st), 'tied max #2 forbidden');
+  assert.ok(!E.violates('avoid_biggest', from, { x: 3, y: 3 }, st), 'non-max compliant');
+  const ctx = E.newCtx();
+  const ids = E.recordTemptation(ctx, st, 'avoid_biggest');
+  assert.strictEqual(ids.length, 2, 'both tied lures registered (g = 5-2 = 3 each)');
+  const tgt = E.tokenAt(st, { x: 4, y: 4 });
+  E.resolveTemptation(ctx, ids, { takenId: st.round + ':' + E.key(tgt), activeMove: false });
+  const res = E.applyMove(st, A.id, { x: 4, y: 4 }, 'avoid_biggest');
+  assert.ok(res.violated && res.penalty > 0, 'taking a tied max is flagged + penalized');
+  const mt = E.maintenanceTotals(ctx);
+  assert.strictEqual(mt.gsum, 6, 'gsum counts both tied lures');
+  assert.strictEqual(mt.resisted, 0, 'no resistance credit on a violating take');
+});
 /* ---------------- runCellAsync: exact parity with runCell ---------------- */
 testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
   // default 'perfect' path, two seeds, two cells