| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| BASE="/lustre/fsw/portfolios/datascience/users/rchesler/scratch/nemotron-ocr-v2" |
| PKG="$BASE/nemotron-ocr" |
| ACCOUNT="datascience_nemo_retriever" |
| PARTITIONS="batch_block1,batch_block3,batch_block4" |
| SLURM_TIME="${SLURM_TIME:-45}" |
| JOB_NAME="${JOB_NAME:-install_ocr_v2}" |
|
|
| TIMESTAMP=$(date +"%Y%m%d_%H%M%S") |
| LOGS="$BASE/inference_output/install_logs" |
| mkdir -p "$LOGS" |
|
|
| JOB="$BASE/.job_install_${TIMESTAMP}.sh" |
| cat > "$JOB" << ENDSCRIPT |
| #!/bin/bash |
| #SBATCH --job-name=${JOB_NAME} |
| #SBATCH --account=${ACCOUNT} |
| #SBATCH --partition=${PARTITIONS} |
| #SBATCH --nodes=1 |
| #SBATCH --gpus-per-node=1 |
| #SBATCH --time=${SLURM_TIME} |
| #SBATCH --output=${LOGS}/install_%j.out |
| #SBATCH --error=${LOGS}/install_%j.err |
| |
| set -euo pipefail |
| source /etc/profile.d/modules.sh 2>/dev/null || true |
| if command -v module >/dev/null 2>&1; then |
| module load cuda12.2/toolkit/12.2.2 || true |
| fi |
| export PATH="\$HOME/.local/bin:\$PATH" |
| |
| cd "${PKG}" |
| uv venv |
| source .venv/bin/activate |
| |
| uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128 |
| rm -f src/nemotron_ocr_cpp/_nemotron_ocr_cpp*.so |
| BUILD_CPP_FORCE=1 uv pip install -e . -v |
| |
| python -c " |
| import torch |
| print(f'torch={torch.__version__} cuda_available={torch.cuda.is_available()}') |
| if torch.cuda.is_available(): |
| print(f'gpu={torch.cuda.get_device_name(0)}') |
| import nemotron_ocr, nemotron_ocr_cpp |
| from nemotron_ocr.inference.pipeline_v2 import NemotronOCRV2 |
| print('nemotron_ocr import OK; nemotron_ocr_cpp OK; NemotronOCRV2 OK') |
| " |
| ENDSCRIPT |
|
|
| chmod +x "$JOB" |
|
|
| echo "============================================================" |
| echo " GPU install job (scratch clone)" |
| echo "============================================================" |
| echo " package: $PKG" |
| echo " logs: $LOGS" |
| echo " job file: $JOB" |
| echo "============================================================" |
|
|
| JID=$(sbatch "$JOB" 2>&1 | sed -n 's/.* \([0-9][0-9]*\)$/\1/p' || true) |
| if [[ -n "${JID:-}" ]]; then |
| echo " submitted job ID: $JID" |
| echo " tail: tail -f $LOGS/install_${JID}.out" |
| else |
| echo " sbatch failed or did not return a job id" >&2 |
| exit 1 |
| fi |
|
|