irregular6612 commited on
Commit
5d2d307
·
1 Parent(s): e99c555

fix(discovery): tie-aware rule-match scoring — accept any tied-best compliant prediction

Browse files
Files changed (2) hide show
  1. engine.js +23 -16
  2. engine.test.js +47 -0
engine.js CHANGED
@@ -1164,29 +1164,35 @@ function boundedInduceRuleFromMemory(bundle, opts) {
1164
  // best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals
1165
  // the best compliant adjacent take under the TRUE rule (what a rule-follower
1166
  // actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode.
1167
- function bestCompliantAdjacent(st, id, rule) {
 
 
1168
  const from = st.pos[id];
1169
- let best = null;
1170
  for (const d of DIRS) {
1171
  const to = { x: from.x + d.x, y: from.y + d.y };
1172
  if (!inb(to)) continue;
1173
  const tok = tokenAt(st, to);
1174
  if (!tok) continue;
1175
  if (violates(rule, from, to, st)) continue;
1176
- if (!best || tok.v > best.tok.v) best = { tok, to };
 
1177
  }
1178
- return best ? best.to : null;
 
 
 
1179
  }
1180
  // discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell
1181
- // the player predicts the past-self should move to. Correct iff `pred` is the
1182
- // rule-COMPLIANT best adjacent take (what a rule-follower does) — NOT the
1183
  // past-self's literal move. When no compliant adjacent take exists at a
1184
  // diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is
1185
  // correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so
1186
  // the human and model Discovery channels agree on every step.
1187
  function discoveryPredCorrect(st, id, pred, rule) {
1188
- const comp = bestCompliantAdjacent(st, id, rule);
1189
- if (comp) return pred.x === comp.x && pred.y === comp.y;
1190
  const from = st.pos[id];
1191
  return !violates(rule, from, pred, st);
1192
  }
@@ -1199,16 +1205,16 @@ function inductionPredLog(trueRule, inducedRule, evalBundle) {
1199
  for (const s of ep.steps) {
1200
  board.pos[A.id] = { ...s.from };
1201
  if (isDiagnostic(board, A.id, trueRule)) {
1202
- const predTrue = bestCompliantAdjacent(board, A.id, trueRule);
1203
  const predInd = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined;
1204
- // correct iff the induced rule prescribes the SAME compliant action as
1205
- // the true rule (both-null == agreement that no compliant take exists,
1206
- // i.e. "step away"); a null/blind inducer (undefined) is always wrong.
1207
  let correct;
1208
- if (predInd === undefined) correct = false; // no rule induced
1209
- else if (predTrue === null && predInd === null) correct = true;
1210
- else if (predTrue === null || predInd === null) correct = false;
1211
- else correct = predTrue.x === predInd.x && predTrue.y === predInd.y;
1212
  predLog.push({ diagnostic: true, correct });
1213
  }
1214
  applyMove(board, A.id, s.to, ep.rule);
@@ -1934,6 +1940,7 @@ return {
1934
  forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith,
1935
  identifyRules, buildMemoryBundle,
1936
  induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent,
 
1937
  discoveryPredCorrect, inductionPredLog,
1938
  // opponents + swap
1939
  cloneSim, applySim, applySimPenalized, violatesSim,
 
1164
  // best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals
1165
  // the best compliant adjacent take under the TRUE rule (what a rule-follower
1166
  // actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode.
1167
+ // ALL maximally-valued compliant adjacent takes (ties included). A rule-follower
1168
+ // may take ANY member; scoring must accept every member, not a DIRS-order pick.
1169
+ function bestCompliantAdjacentSet(st, id, rule) {
1170
  const from = st.pos[id];
1171
+ let bestV = -Infinity; const out = [];
1172
  for (const d of DIRS) {
1173
  const to = { x: from.x + d.x, y: from.y + d.y };
1174
  if (!inb(to)) continue;
1175
  const tok = tokenAt(st, to);
1176
  if (!tok) continue;
1177
  if (violates(rule, from, to, st)) continue;
1178
+ if (tok.v > bestV) { bestV = tok.v; out.length = 0; }
1179
+ if (tok.v === bestV) out.push(to);
1180
  }
1181
+ return out;
1182
+ }
1183
+ function bestCompliantAdjacent(st, id, rule) {
1184
+ return bestCompliantAdjacentSet(st, id, rule)[0] || null;
1185
  }
1186
  // discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell
1187
+ // the player predicts the past-self should move to. Correct iff `pred` is any
1188
+ // maximally-valued compliant adjacent take (ties accepted) — NOT the
1189
  // past-self's literal move. When no compliant adjacent take exists at a
1190
  // diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is
1191
  // correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so
1192
  // the human and model Discovery channels agree on every step.
1193
  function discoveryPredCorrect(st, id, pred, rule) {
1194
+ const set = bestCompliantAdjacentSet(st, id, rule);
1195
+ if (set.length) return set.some(c => c.x === pred.x && c.y === pred.y);
1196
  const from = st.pos[id];
1197
  return !violates(rule, from, pred, st);
1198
  }
 
1205
  for (const s of ep.steps) {
1206
  board.pos[A.id] = { ...s.from };
1207
  if (isDiagnostic(board, A.id, trueRule)) {
1208
+ const trueSet = bestCompliantAdjacentSet(board, A.id, trueRule);
1209
  const predInd = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined;
1210
+ // correct iff the induced rule's committed pick is one of the true rule's
1211
+ // tied-best compliant takes (both-empty == agreement to step away);
1212
+ // a null/blind inducer (undefined) is always wrong.
1213
  let correct;
1214
+ if (predInd === undefined) correct = false;
1215
+ else if (trueSet.length === 0 && predInd === null) correct = true;
1216
+ else if (trueSet.length === 0 || predInd === null) correct = false;
1217
+ else correct = trueSet.some(c => c.x === predInd.x && c.y === predInd.y);
1218
  predLog.push({ diagnostic: true, correct });
1219
  }
1220
  applyMove(board, A.id, s.to, ep.rule);
 
1940
  forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith,
1941
  identifyRules, buildMemoryBundle,
1942
  induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent,
1943
+ bestCompliantAdjacentSet,
1944
  discoveryPredCorrect, inductionPredLog,
1945
  // opponents + swap
1946
  cloneSim, applySim, applySimPenalized, violatesSim,
engine.test.js CHANGED
@@ -1210,6 +1210,53 @@ test('discoveryPredCorrect agrees with inductionPredLog for an oracle player', (
1210
  }
1211
  });
1212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1213
  /* ---------------- runCellAsync: exact parity with runCell ---------------- */
1214
  testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
1215
  // default 'perfect' path, two seeds, two cells
 
1210
  }
1211
  });
1212
 
1213
+ // TIE-AWARENESS: when two adjacent compliant tokens TIE at the best value, a
1214
+ // rule-follower may take EITHER — both predictions must score correct. The old
1215
+ // DIRS-first tie-break marked the equally-valid second cell wrong.
1216
+ test('discoveryPredCorrect accepts ANY tied-best compliant adjacent prediction', () => {
1217
+ const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
1218
+ for (const t of st.tokens) t.alive = false; // hand-build the decision point
1219
+ st.pos[A.id] = { x: 4, y: 3 };
1220
+ st.pos[O.id] = { x: 8, y: 8 };
1221
+ st.tokens.push({ x: 4, y: 2, v: 9, alive: true, guard: true }); // U: board max -> forbidden
1222
+ st.tokens.push({ x: 4, y: 4, v: 4, alive: true, guard: false }); // D: tied best compliant
1223
+ st.tokens.push({ x: 3, y: 3, v: 4, alive: true, guard: false }); // L: tied best compliant
1224
+ st.tokens.push({ x: 5, y: 3, v: 1, alive: true, guard: false }); // R: worse compliant
1225
+ const set = E.bestCompliantAdjacentSet(st, A.id, 'avoid_biggest');
1226
+ assert.strictEqual(set.length, 2, 'both tied-best cells in the set');
1227
+ assert.ok(E.discoveryPredCorrect(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'), 'DIRS-first tied best correct');
1228
+ assert.ok(E.discoveryPredCorrect(st, A.id, { x: 3, y: 3 }, 'avoid_biggest'), 'OTHER tied best ALSO correct');
1229
+ assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 4, y: 2 }, 'avoid_biggest'), 'forbidden max wrong');
1230
+ assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 5, y: 3 }, 'avoid_biggest'), 'lower-valued compliant wrong');
1231
+ });
1232
+
1233
+ // avoid_biggest with a TIED board max: EVERY tied token is forbidden; taking one
1234
+ // is penalized; Maintenance bookkeeping registers each tied lure separately and
1235
+ // credits resistance only on an active compliant move (C10).
1236
+ test('avoid_biggest tied max: all tied tokens forbidden + maintenance bookkeeping', () => {
1237
+ const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
1238
+ for (const t of st.tokens) t.alive = false;
1239
+ st.pos[A.id] = { x: 4, y: 3 };
1240
+ st.pos[O.id] = { x: 8, y: 8 };
1241
+ st.tokens.push({ x: 4, y: 4, v: 5, alive: true, guard: false }); // D: tied max
1242
+ st.tokens.push({ x: 4, y: 2, v: 5, alive: true, guard: false }); // U: tied max
1243
+ st.tokens.push({ x: 3, y: 3, v: 2, alive: true, guard: false }); // L: compliant
1244
+ const from = st.pos[A.id];
1245
+ assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 4 }, st), 'tied max #1 forbidden');
1246
+ assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 2 }, st), 'tied max #2 forbidden');
1247
+ assert.ok(!E.violates('avoid_biggest', from, { x: 3, y: 3 }, st), 'non-max compliant');
1248
+ const ctx = E.newCtx();
1249
+ const ids = E.recordTemptation(ctx, st, 'avoid_biggest');
1250
+ assert.strictEqual(ids.length, 2, 'both tied lures registered (g = 5-2 = 3 each)');
1251
+ const tgt = E.tokenAt(st, { x: 4, y: 4 });
1252
+ E.resolveTemptation(ctx, ids, { takenId: st.round + ':' + E.key(tgt), activeMove: false });
1253
+ const res = E.applyMove(st, A.id, { x: 4, y: 4 }, 'avoid_biggest');
1254
+ assert.ok(res.violated && res.penalty > 0, 'taking a tied max is flagged + penalized');
1255
+ const mt = E.maintenanceTotals(ctx);
1256
+ assert.strictEqual(mt.gsum, 6, 'gsum counts both tied lures');
1257
+ assert.strictEqual(mt.resisted, 0, 'no resistance credit on a violating take');
1258
+ });
1259
+
1260
  /* ---------------- runCellAsync: exact parity with runCell ---------------- */
1261
  testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
1262
  // default 'perfect' path, two seeds, two cells