CausalGrok / code /scripts /lib /nohup_runner.sh
nileshsarkar-ai's picture
Upload code/scripts
42c0d23 verified
#!/usr/bin/env bash
# Reusable nohup-detach helper.
# Anything launched through this survives SSH disconnects.
#
# Usage (from another script):
# source scripts/lib/nohup_runner.sh
# launch_detached <run_dir> <command...>
#
# Effect:
# - stdout β†’ <run_dir>/logs/train.log
# - stderr β†’ <run_dir>/logs/train.err
# - PID β†’ <run_dir>/run.pid
# - prints monitoring/stopping commands
#
# Why nohup + setsid + disown:
# nohup β€” ignore SIGHUP when the controlling terminal closes
# setsid β€” start a new session so the process detaches from the tty
# disown β€” remove from the shell's job table so an interactive exit
# does not signal it
# Together this is robust against ssh drops, terminal closes, and
# Ctrl+D logouts.
set -euo pipefail
launch_detached() {
local run_dir="$1"; shift
mkdir -p "${run_dir}/logs"
local log="${run_dir}/logs/train.log"
local err="${run_dir}/logs/train.err"
local pidfile="${run_dir}/run.pid"
# Print exact command for reproducibility
{
echo "# launched: $(date -u +%FT%TZ)"
echo "# host: $(hostname)"
echo "# pwd: $(pwd)"
echo "# cmd: $*"
echo "----"
} >> "${log}"
nohup setsid "$@" >> "${log}" 2>> "${err}" < /dev/null &
local pid=$!
disown "$pid" 2>/dev/null || true
echo "$pid" > "${pidfile}"
echo
echo " Detached run:"
echo " run_dir : ${run_dir}"
echo " PID : ${pid}"
echo " follow : tail -f ${log}"
echo " stop : kill ${pid}"
echo
}