Diff of /tools/slurm_test.sh [000000] .. [4e96d3]

Switch to unified view

a b/tools/slurm_test.sh
1
#!/usr/bin/env bash
2
3
set -x
4
5
PARTITION=$1
6
JOB_NAME=$2
7
CONFIG=$3
8
CHECKPOINT=$4
9
GPUS=${GPUS:-4}
10
GPUS_PER_NODE=${GPUS_PER_NODE:-4}
11
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12
PY_ARGS=${@:5}
13
SRUN_ARGS=${SRUN_ARGS:-""}
14
15
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16
srun -p ${PARTITION} \
17
    --job-name=${JOB_NAME} \
18
    --gres=gpu:${GPUS_PER_NODE} \
19
    --ntasks=${GPUS} \
20
    --ntasks-per-node=${GPUS_PER_NODE} \
21
    --cpus-per-task=${CPUS_PER_TASK} \
22
    --kill-on-bad-exit=1 \
23
    ${SRUN_ARGS} \
24
    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}