Switch to unified view

a b/tasks_base/train.pbs.template
1
#!/bin/sh
2
3
### The following requests all resources on 1 DGX-1 node
4
#PBS -l select=1:ncpus=40:ngpus=8:mem=160G
5
6
### The "select=1" specifies the number of nodes
7
### The "ncpus=40:ngpus=8" asks for acccess to all 8 GPU cards
8
### If you request less than 8 GPU then make the ncpus value
9
###   five times the ngpus value, e.g. select=1:ncpus=5:ngpus=1
10
11
### Specify amount of time required
12
###  values less than 4 hours go into a higher priority queue
13
#PBS -l walltime=23:59:59
14
15
### Specify DGX queue
16
#PBS -q dgx
17
18
### Specify project code
19
### e.g. 41000001 was the pilot project code
20
###      Personal is your personal lifetime allowance
21
### Job will not submit unless this is changed
22
#PBS -P 12001577
23
24
### Specify name for job
25
#PBS -N train_base_{0}
26
27
### Standard output by default goes to file $PBS_JOBNAME.o$PBS_JOBID
28
### Standard error by default goes to file $PBS_JOBNAME.e$PBS_JOBID
29
### To merge standard output and error use the following
30
#PBS -j oe
31
32
### Start of commands to be run
33
34
# Docker image to use for container
35
#   To see available images run command: nscc-docker images
36
#   If image is not present, email help@nscc.sg to request pulling image into repository on all DGX nodes
37
image="nvcr.io/nvidia/pytorch:20.01-py3"
38
39
# Change to directory where job was submitted
40
cd "$PBS_O_WORKDIR" || exit $?
41
# Please note that when you start a Docker container then inside the container it will start in a different directory
42
# You will also need to change to the correct directory inside the container
43
44
# The "nscc-docker run $image" command runs the following Docker command: 
45
#    nvidia-docker -u $UID:$GID -v /home:/home -v /raid:/raid --rm -i --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 $image /bin/sh
46
# See full list of options with "nscc-docker run -h"
47
# Pass the commands that you wish to run inside the container on the standard input
48
# Edit file stdin as required
49
nscc-docker run $image < script.{0}.sh # > stdout.$PBS_JOBID 2> stderr.$PBS_JOBID