File size: 1,567 Bytes
42c0d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env bash
# Reusable nohup-detach helper.
# Anything launched through this survives SSH disconnects.
#
# Usage (from another script):
#   source scripts/lib/nohup_runner.sh
#   launch_detached <run_dir> <command...>
#
# Effect:
#   - stdout → <run_dir>/logs/train.log
#   - stderr → <run_dir>/logs/train.err
#   - PID    → <run_dir>/run.pid
#   - prints monitoring/stopping commands
#
# Why nohup + setsid + disown:
#   nohup   — ignore SIGHUP when the controlling terminal closes
#   setsid  — start a new session so the process detaches from the tty
#   disown  — remove from the shell's job table so an interactive exit
#             does not signal it
# Together this is robust against ssh drops, terminal closes, and
# Ctrl+D logouts.

set -euo pipefail

launch_detached() {
    local run_dir="$1"; shift
    mkdir -p "${run_dir}/logs"
    local log="${run_dir}/logs/train.log"
    local err="${run_dir}/logs/train.err"
    local pidfile="${run_dir}/run.pid"

    # Print exact command for reproducibility
    {
        echo "# launched: $(date -u +%FT%TZ)"
        echo "# host:     $(hostname)"
        echo "# pwd:      $(pwd)"
        echo "# cmd:      $*"
        echo "----"
    } >> "${log}"

    nohup setsid "$@" >> "${log}" 2>> "${err}" < /dev/null &
    local pid=$!
    disown "$pid" 2>/dev/null || true
    echo "$pid" > "${pidfile}"

    echo
    echo "  Detached run:"
    echo "    run_dir : ${run_dir}"
    echo "    PID     : ${pid}"
    echo "    follow  : tail -f ${log}"
    echo "    stop    : kill ${pid}"
    echo
}