#!/usr/bin/env bash # Submit OCR + visualization (NemotronOCRV2) on a GPU node from scratch clone. # # Usage: # ./submit_inference_viz_gpu.sh /path/to/images [OUTPUT_DIR] # # Environment (optional): # MODEL — checkpoint directory (default: $BASE/v2_multilingual) # BATCH_SIZE — v2 detector batch size (default: 16) # MERGE_LEVEL — word|sentence|paragraph (default: sentence) # SLURM_TIME — wall time in minutes (default: 120) # JOB_NAME — sbatch job name (default: infer_viz_scratch) set -euo pipefail BASE="/lustre/fsw/portfolios/datascience/users/rchesler/scratch/nemotron-ocr-v2" VENV="$BASE/nemotron-ocr/.venv" ACCOUNT="datascience_nemo_retriever" PARTITIONS="batch_block1,batch_block3,batch_block4" IMAGES="${1:?usage: $0 IMAGES_DIR [OUTPUT_DIR]}" TIMESTAMP=$(date +"%Y%m%d_%H%M%S") OUT="${2:-$BASE/inference_output/viz_v2_${TIMESTAMP}}" MODEL="${MODEL:-$BASE/v2_multilingual}" BATCH_SIZE="${BATCH_SIZE:-16}" MERGE_LEVEL="${MERGE_LEVEL:-sentence}" SLURM_TIME="${SLURM_TIME:-120}" JOB_NAME="${JOB_NAME:-infer_viz_scratch}" LOGS="$OUT/logs" mkdir -p "$LOGS" PREAMBLE='source /etc/profile.d/modules.sh 2>/dev/null || true if command -v module >/dev/null 2>&1; then module load cuda12.2/toolkit/12.2.2 || true fi export PATH="$HOME/.local/bin:$PATH" source '"$VENV"'/bin/activate' JOB="$OUT/.job_inference_${TIMESTAMP}.sh" cat > "$JOB" << ENDSCRIPT #!/bin/bash #SBATCH --job-name=${JOB_NAME} #SBATCH --account=${ACCOUNT} #SBATCH --partition=${PARTITIONS} #SBATCH --nodes=1 #SBATCH --gpus-per-node=1 #SBATCH --time=${SLURM_TIME} #SBATCH --output=${LOGS}/infer_%j.out #SBATCH --error=${LOGS}/infer_%j.err set -euo pipefail ${PREAMBLE} cd ${BASE} python run_ocr_inference.py \\ --model_dir "${MODEL}" \\ --images_dir "${IMAGES}" \\ --output_dir "${OUT}" \\ --pipeline v2 \\ --batch_size ${BATCH_SIZE} \\ --num_samples 0 \\ --merge_level ${MERGE_LEVEL} echo "Done: \$(date)" ENDSCRIPT chmod +x "$JOB" echo "============================================================" echo " Inference + viz (v2 pipeline) — GPU job (scratch clone)" echo "============================================================" echo " images: $IMAGES" echo " model: $MODEL" echo " merge: $MERGE_LEVEL" echo " output: $OUT" echo " logs: $LOGS" echo " job file: $JOB" echo "============================================================" JID=$(sbatch "$JOB" 2>&1 | sed -n 's/.* \([0-9][0-9]*\)$/\1/p' || true) if [[ -n "${JID:-}" ]]; then echo " submitted job ID: $JID" echo " tail: tail -f $LOGS/infer_${JID}.out" else echo " sbatch failed or did not return a job id" >&2 exit 1 fi