""" utils.run_dir — per-experiment directory contract. Every training invocation creates its own directory under experiments/runs// with the following layout. This is the single source of truth for that contract. experiments/runs/__n_s/ config.json # frozen config used for this run run.pid # PID of the detached process (written by launcher) logs/ train.log train.err results/ history.json # incremental per-checkpoint metrics summary.json # final test acc, grokking epoch, etc. checkpoints/ final.pt figures/ training_curves.png """ from __future__ import annotations import json import os from datetime import datetime, timezone from typing import Iterable DEFAULT_BASE = os.path.join("experiments", "runs") SUBDIRS = ("logs", "results", "checkpoints", "figures") def make_run_dir(run_id_parts: Iterable[str], base: str = DEFAULT_BASE): """ Create experiments/runs/_<...parts>/ and all standard subdirs. Returns (run_dir, run_id). Example: make_run_dir(["grokking", "n500", "s42"]) → ("experiments/runs/20260430-141500_grokking_n500_s42", "20260430-141500_grokking_n500_s42") """ stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") run_id = "_".join([stamp, *run_id_parts]) run_dir = os.path.join(base, run_id) ensure_run_dir(run_dir) return run_dir, run_id def ensure_run_dir(run_dir: str): for sub in SUBDIRS: os.makedirs(os.path.join(run_dir, sub), exist_ok=True) def save_config(cfg: dict, run_dir: str): with open(os.path.join(run_dir, "config.json"), "w") as f: json.dump(cfg, f, indent=2)