#!/usr/bin/env bash # Decision checklist when returning at ~2 PM UTC # Run this FIRST before anything else set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "${ROOT}" echo "════════════════════════════════════════════════════════════" echo " OOD Summary — Decision Point at 2 PM UTC" echo "════════════════════════════════════════════════════════════" echo "" python3 << 'PYSCRIPT' import json, glob rows = [] for f in sorted(glob.glob('experiments/runs/*/results/summary.json')): try: s = json.load(open(f)) rows.append(( s.get('run_id','?')[-35:], s.get('condition','?'), s.get('n_train',0), s.get('seed',0), s.get('best_ood', s.get('ood_test_acc', 0)), s.get('ood_improvement', 0), s.get('grokking_epoch', -1), )) except: pass rows.sort(key=lambda x: x[4], reverse=True) print(f"{'run':<35s} {'cond':<8s} {'n':>4s} {'s':>3s} {'best_ood':>8s} {'impr':>7s} {'grok_ep':<8s}") print("-" * 90) for r in rows: ep_str = str(int(r[6])) if r[6] > 0 else "—" print(f"{r[0]:<35s} {r[1]:<8s} {r[2]:>4d} {r[3]:>3d} {r[4]:>8.3f} {r[5]:>+7.3f} {ep_str:<8s}") print("") print("════════════════════════════════════════════════════════════") print(" Decision Logic:") print("════════════════════════════════════════════════════════════") print("") # Extract key runs runs_dict = {(r[1], r[2], r[3]): r[4] for r in rows} s42_grok = runs_dict.get(('grokking', 300, 42), 0) s123_grok = runs_dict.get(('grokking', 300, 123), 0) s456_grok = runs_dict.get(('grokking', 300, 456), 0) print(f"s42 (grokking): best_ood = {s42_grok:.3f}") print(f"s123 (grokking): best_ood = {s123_grok:.3f}") print(f"s456 (grokking): best_ood = {s456_grok:.3f}") print("") if s123_grok > 0.68 and s456_grok > 0.68: print("✅ REPLICATION CONFIRMED: s123 and s456 both > 0.68") print("") print(" NEXT STEP:") print(" 1. Wire IRM fix to causalgrok_camelyon_v2.py (~15 min)") print(" 2. Fire ablation grid:") print(" python -m experiments.run_ablations --parallel --n_gpus 16") print("") else: print("⚠️ REPLICATION UNCERTAIN: s123 or s456 < 0.68") print("") print(" HOLD: Do not fire ablation grid yet") print(" NEXT: Characterize seed sensitivity") print(" - Check if seed is the main variable") print(" - Run 3 more seeds at n=300 to understand range") print("") PYSCRIPT echo "════════════════════════════════════════════════════════════"