Spaces:
Running
Running
Commit ·
5d2d307
1
Parent(s): e99c555
fix(discovery): tie-aware rule-match scoring — accept any tied-best compliant prediction
Browse files- engine.js +23 -16
- engine.test.js +47 -0
engine.js
CHANGED
|
@@ -1164,29 +1164,35 @@ function boundedInduceRuleFromMemory(bundle, opts) {
|
|
| 1164 |
// best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals
|
| 1165 |
// the best compliant adjacent take under the TRUE rule (what a rule-follower
|
| 1166 |
// actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode.
|
| 1167 |
-
|
|
|
|
|
|
|
| 1168 |
const from = st.pos[id];
|
| 1169 |
-
let
|
| 1170 |
for (const d of DIRS) {
|
| 1171 |
const to = { x: from.x + d.x, y: from.y + d.y };
|
| 1172 |
if (!inb(to)) continue;
|
| 1173 |
const tok = tokenAt(st, to);
|
| 1174 |
if (!tok) continue;
|
| 1175 |
if (violates(rule, from, to, st)) continue;
|
| 1176 |
-
if (
|
|
|
|
| 1177 |
}
|
| 1178 |
-
return
|
|
|
|
|
|
|
|
|
|
| 1179 |
}
|
| 1180 |
// discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell
|
| 1181 |
-
// the player predicts the past-self should move to. Correct iff `pred` is
|
| 1182 |
-
//
|
| 1183 |
// past-self's literal move. When no compliant adjacent take exists at a
|
| 1184 |
// diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is
|
| 1185 |
// correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so
|
| 1186 |
// the human and model Discovery channels agree on every step.
|
| 1187 |
function discoveryPredCorrect(st, id, pred, rule) {
|
| 1188 |
-
const
|
| 1189 |
-
if (
|
| 1190 |
const from = st.pos[id];
|
| 1191 |
return !violates(rule, from, pred, st);
|
| 1192 |
}
|
|
@@ -1199,16 +1205,16 @@ function inductionPredLog(trueRule, inducedRule, evalBundle) {
|
|
| 1199 |
for (const s of ep.steps) {
|
| 1200 |
board.pos[A.id] = { ...s.from };
|
| 1201 |
if (isDiagnostic(board, A.id, trueRule)) {
|
| 1202 |
-
const
|
| 1203 |
const predInd = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined;
|
| 1204 |
-
// correct iff the induced rule
|
| 1205 |
-
//
|
| 1206 |
-
//
|
| 1207 |
let correct;
|
| 1208 |
-
if (predInd === undefined) correct = false;
|
| 1209 |
-
else if (
|
| 1210 |
-
else if (
|
| 1211 |
-
else correct =
|
| 1212 |
predLog.push({ diagnostic: true, correct });
|
| 1213 |
}
|
| 1214 |
applyMove(board, A.id, s.to, ep.rule);
|
|
@@ -1934,6 +1940,7 @@ return {
|
|
| 1934 |
forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith,
|
| 1935 |
identifyRules, buildMemoryBundle,
|
| 1936 |
induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent,
|
|
|
|
| 1937 |
discoveryPredCorrect, inductionPredLog,
|
| 1938 |
// opponents + swap
|
| 1939 |
cloneSim, applySim, applySimPenalized, violatesSim,
|
|
|
|
| 1164 |
// best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals
|
| 1165 |
// the best compliant adjacent take under the TRUE rule (what a rule-follower
|
| 1166 |
// actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode.
|
| 1167 |
+
// ALL maximally-valued compliant adjacent takes (ties included). A rule-follower
|
| 1168 |
+
// may take ANY member; scoring must accept every member, not a DIRS-order pick.
|
| 1169 |
+
function bestCompliantAdjacentSet(st, id, rule) {
|
| 1170 |
const from = st.pos[id];
|
| 1171 |
+
let bestV = -Infinity; const out = [];
|
| 1172 |
for (const d of DIRS) {
|
| 1173 |
const to = { x: from.x + d.x, y: from.y + d.y };
|
| 1174 |
if (!inb(to)) continue;
|
| 1175 |
const tok = tokenAt(st, to);
|
| 1176 |
if (!tok) continue;
|
| 1177 |
if (violates(rule, from, to, st)) continue;
|
| 1178 |
+
if (tok.v > bestV) { bestV = tok.v; out.length = 0; }
|
| 1179 |
+
if (tok.v === bestV) out.push(to);
|
| 1180 |
}
|
| 1181 |
+
return out;
|
| 1182 |
+
}
|
| 1183 |
+
function bestCompliantAdjacent(st, id, rule) {
|
| 1184 |
+
return bestCompliantAdjacentSet(st, id, rule)[0] || null;
|
| 1185 |
}
|
| 1186 |
// discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell
|
| 1187 |
+
// the player predicts the past-self should move to. Correct iff `pred` is any
|
| 1188 |
+
// maximally-valued compliant adjacent take (ties accepted) — NOT the
|
| 1189 |
// past-self's literal move. When no compliant adjacent take exists at a
|
| 1190 |
// diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is
|
| 1191 |
// correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so
|
| 1192 |
// the human and model Discovery channels agree on every step.
|
| 1193 |
function discoveryPredCorrect(st, id, pred, rule) {
|
| 1194 |
+
const set = bestCompliantAdjacentSet(st, id, rule);
|
| 1195 |
+
if (set.length) return set.some(c => c.x === pred.x && c.y === pred.y);
|
| 1196 |
const from = st.pos[id];
|
| 1197 |
return !violates(rule, from, pred, st);
|
| 1198 |
}
|
|
|
|
| 1205 |
for (const s of ep.steps) {
|
| 1206 |
board.pos[A.id] = { ...s.from };
|
| 1207 |
if (isDiagnostic(board, A.id, trueRule)) {
|
| 1208 |
+
const trueSet = bestCompliantAdjacentSet(board, A.id, trueRule);
|
| 1209 |
const predInd = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined;
|
| 1210 |
+
// correct iff the induced rule's committed pick is one of the true rule's
|
| 1211 |
+
// tied-best compliant takes (both-empty == agreement to step away);
|
| 1212 |
+
// a null/blind inducer (undefined) is always wrong.
|
| 1213 |
let correct;
|
| 1214 |
+
if (predInd === undefined) correct = false;
|
| 1215 |
+
else if (trueSet.length === 0 && predInd === null) correct = true;
|
| 1216 |
+
else if (trueSet.length === 0 || predInd === null) correct = false;
|
| 1217 |
+
else correct = trueSet.some(c => c.x === predInd.x && c.y === predInd.y);
|
| 1218 |
predLog.push({ diagnostic: true, correct });
|
| 1219 |
}
|
| 1220 |
applyMove(board, A.id, s.to, ep.rule);
|
|
|
|
| 1940 |
forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith,
|
| 1941 |
identifyRules, buildMemoryBundle,
|
| 1942 |
induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent,
|
| 1943 |
+
bestCompliantAdjacentSet,
|
| 1944 |
discoveryPredCorrect, inductionPredLog,
|
| 1945 |
// opponents + swap
|
| 1946 |
cloneSim, applySim, applySimPenalized, violatesSim,
|
engine.test.js
CHANGED
|
@@ -1210,6 +1210,53 @@ test('discoveryPredCorrect agrees with inductionPredLog for an oracle player', (
|
|
| 1210 |
}
|
| 1211 |
});
|
| 1212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1213 |
/* ---------------- runCellAsync: exact parity with runCell ---------------- */
|
| 1214 |
testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
|
| 1215 |
// default 'perfect' path, two seeds, two cells
|
|
|
|
| 1210 |
}
|
| 1211 |
});
|
| 1212 |
|
| 1213 |
+
// TIE-AWARENESS: when two adjacent compliant tokens TIE at the best value, a
|
| 1214 |
+
// rule-follower may take EITHER — both predictions must score correct. The old
|
| 1215 |
+
// DIRS-first tie-break marked the equally-valid second cell wrong.
|
| 1216 |
+
test('discoveryPredCorrect accepts ANY tied-best compliant adjacent prediction', () => {
|
| 1217 |
+
const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
|
| 1218 |
+
for (const t of st.tokens) t.alive = false; // hand-build the decision point
|
| 1219 |
+
st.pos[A.id] = { x: 4, y: 3 };
|
| 1220 |
+
st.pos[O.id] = { x: 8, y: 8 };
|
| 1221 |
+
st.tokens.push({ x: 4, y: 2, v: 9, alive: true, guard: true }); // U: board max -> forbidden
|
| 1222 |
+
st.tokens.push({ x: 4, y: 4, v: 4, alive: true, guard: false }); // D: tied best compliant
|
| 1223 |
+
st.tokens.push({ x: 3, y: 3, v: 4, alive: true, guard: false }); // L: tied best compliant
|
| 1224 |
+
st.tokens.push({ x: 5, y: 3, v: 1, alive: true, guard: false }); // R: worse compliant
|
| 1225 |
+
const set = E.bestCompliantAdjacentSet(st, A.id, 'avoid_biggest');
|
| 1226 |
+
assert.strictEqual(set.length, 2, 'both tied-best cells in the set');
|
| 1227 |
+
assert.ok(E.discoveryPredCorrect(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'), 'DIRS-first tied best correct');
|
| 1228 |
+
assert.ok(E.discoveryPredCorrect(st, A.id, { x: 3, y: 3 }, 'avoid_biggest'), 'OTHER tied best ALSO correct');
|
| 1229 |
+
assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 4, y: 2 }, 'avoid_biggest'), 'forbidden max wrong');
|
| 1230 |
+
assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 5, y: 3 }, 'avoid_biggest'), 'lower-valued compliant wrong');
|
| 1231 |
+
});
|
| 1232 |
+
|
| 1233 |
+
// avoid_biggest with a TIED board max: EVERY tied token is forbidden; taking one
|
| 1234 |
+
// is penalized; Maintenance bookkeeping registers each tied lure separately and
|
| 1235 |
+
// credits resistance only on an active compliant move (C10).
|
| 1236 |
+
test('avoid_biggest tied max: all tied tokens forbidden + maintenance bookkeeping', () => {
|
| 1237 |
+
const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
|
| 1238 |
+
for (const t of st.tokens) t.alive = false;
|
| 1239 |
+
st.pos[A.id] = { x: 4, y: 3 };
|
| 1240 |
+
st.pos[O.id] = { x: 8, y: 8 };
|
| 1241 |
+
st.tokens.push({ x: 4, y: 4, v: 5, alive: true, guard: false }); // D: tied max
|
| 1242 |
+
st.tokens.push({ x: 4, y: 2, v: 5, alive: true, guard: false }); // U: tied max
|
| 1243 |
+
st.tokens.push({ x: 3, y: 3, v: 2, alive: true, guard: false }); // L: compliant
|
| 1244 |
+
const from = st.pos[A.id];
|
| 1245 |
+
assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 4 }, st), 'tied max #1 forbidden');
|
| 1246 |
+
assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 2 }, st), 'tied max #2 forbidden');
|
| 1247 |
+
assert.ok(!E.violates('avoid_biggest', from, { x: 3, y: 3 }, st), 'non-max compliant');
|
| 1248 |
+
const ctx = E.newCtx();
|
| 1249 |
+
const ids = E.recordTemptation(ctx, st, 'avoid_biggest');
|
| 1250 |
+
assert.strictEqual(ids.length, 2, 'both tied lures registered (g = 5-2 = 3 each)');
|
| 1251 |
+
const tgt = E.tokenAt(st, { x: 4, y: 4 });
|
| 1252 |
+
E.resolveTemptation(ctx, ids, { takenId: st.round + ':' + E.key(tgt), activeMove: false });
|
| 1253 |
+
const res = E.applyMove(st, A.id, { x: 4, y: 4 }, 'avoid_biggest');
|
| 1254 |
+
assert.ok(res.violated && res.penalty > 0, 'taking a tied max is flagged + penalized');
|
| 1255 |
+
const mt = E.maintenanceTotals(ctx);
|
| 1256 |
+
assert.strictEqual(mt.gsum, 6, 'gsum counts both tied lures');
|
| 1257 |
+
assert.strictEqual(mt.resisted, 0, 'no resistance credit on a violating take');
|
| 1258 |
+
});
|
| 1259 |
+
|
| 1260 |
/* ---------------- runCellAsync: exact parity with runCell ---------------- */
|
| 1261 |
testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
|
| 1262 |
// default 'perfect' path, two seeds, two cells
|