#!/usr/bin/env bash # Post-processing pipeline: run after all Camelyon17 experiments complete # Usage: bash scripts/post_process.sh set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "${ROOT}" source scripts/lib/env.sh resolve_python echo "════════════════════════════════════════════════════════════" echo " CausalGrok Post-Processing Pipeline" echo "════════════════════════════════════════════════════════════" # Step 1: Generate all figures from run histories echo "" echo "[1/3] Generating paper figures from run histories..." "${PYTHON}" -m experiments.plot_results --save_dir paper_figures echo " ✓ Figures saved to paper_figures/" # Step 2: Run M1 mechanistic interpretability on all runs echo "" echo "[2/3] Running M1 analysis (layer-wise probing)..." "${PYTHON}" -m experiments.mechinterp_m1 \ --all_runs \ --data_root data/wilds \ --latest_only echo " ✓ M1 analysis complete" # Step 3: Print summary table echo "" echo "[3/3] Final results summary:" echo "" "${PYTHON}" << 'EOF' import json import glob from pathlib import Path print(f"{'Run ID':<50} | {'OOD Best':>8} | {'Improvement':>11} | {'Grokking Epoch':>15}") print("-" * 100) results = [] for f in sorted(glob.glob("experiments/runs/*camelyon*/results/summary.json")): try: s = json.load(open(f)) rid = Path(f).parent.parent.name[:48] best_ood = s.get("best_ood", 0) impr = s.get("ood_improvement", 0) grok_ep = s.get("grokking_epoch", -1) results.append((rid, best_ood, impr, grok_ep)) ep_str = str(int(grok_ep)) if grok_ep > 0 else "—" print(f"{rid:<50} | {best_ood:>8.3f} | {impr:>+10.3f} | {ep_str:>15}") except Exception as e: pass print("") if results: best_idx = max(range(len(results)), key=lambda i: results[i][1]) best_rid, best_ood, _, _ = results[best_idx] print(f"Best OOD performance: {best_rid} ({best_ood:.3f})") EOF echo "" echo "════════════════════════════════════════════════════════════" echo " Post-processing complete!" echo "════════════════════════════════════════════════════════════"