Difference between revisions of "SLURM"
Line 235: | Line 235: | ||
echo "FINKI FCC" | echo "FINKI FCC" | ||
'''<h1 id="Slurm_Check">Checking the status of the job</h1>''' |
Revision as of 11:18, 30 August 2021
Initiate and manage SLURM tasks
Contents |
---|
Most used parameters:
Parameters | Description |
---|---|
#SBATCH --ntasks-per-node=2 | # Number of tasks per phisical CPU core |
#SBATCH --time=1:00:00 | # Script duration (days-hrs:min:sec) |
#SBATCH --job-name=test_job | # Job name |
#SBATCH --mem=1G | # Ram memory for rendering (e.g. 1G, 2G, 4G) |
#SBATCH --error=testerror_%j.error | # Print the errors that occur when executing the job |
#SBATCH --cpus-per-task=1 | # Number of processors required for a single task |
#SBATCH --output=testoutput_%j.out | # Print the results from scripts and the values it returns |
#SBATCH --gres=gpu:2 | # Number of cards per one nod allocated for the job |
#SBATCH --nodelist=cuda4 | # Executing on specific nodes, e.g. cuda4 is for executing only on cuda4 host |
export PATH="/opt/anaconda3/bin:$PATH"
source /opt/anaconda3/etc/profile.d/conda.sh
conda create -n virtualenv python=3.8
conda activate virtualenv
echo "FINKI FCC"
Example by executing a simple script
#!/bin/bash
#SBATCH --job-name=test_job
#SBATCH --time=1:00:00
#SBATCH –-ntasks-per-node=1
#SBATCH --error=testerror_%j.error
#SBATCH --output=testoutput_%j.out
export PATH="/opt/anaconda3/bin:$PATH"
source /opt/anaconda3/etc/profile.d/conda.sh
conda create -n virtualenv python=3.8
conda activate virtualenv
echo "FINKI FCC"
The script is executed via sbatch <scriptname>.sh
GPU memory selection options
There are 4 options for selecting GPU memory and this can be done by combining some of the commands in the script
GPU Memory | Code for the script |
---|---|
16 GB GDDR6 | #SBATCH --gres=gpu:1
#SBATCH --nodelist=cuda1 (or cuda2 or cuda3) |
32 GB GDDR6 | #SBATCH --gres=gpu:2
#SBATCH --nodelist=cuda1 (or cuda2 or cuda3) |
48 GB GDDR6 | #SBATCH --gres=gpu:1
#SBATCH --nodelist=cuda4 |
96 GB GDDR6 | #SBATCH --gres=gpu:2
#SBATCH --nodelist=cuda4 |
Examples with GPU memory selection
Example with 16 GB GPU:
#!/bin/bash
#SBATCH --ntasks-per-node=2
#SBATCH --time=1:00:00
#SBATCH --job-name=test_job
#SBATCH --mem=1G
#SBATCH --error=testerror_%j.error
#SBATCH --cpus-per-task=1
#SBATCH --output=testoutput_%j.out
#SBATCH --gres=gpu:1
#SBATCH --nodelist=cuda1
export PATH="/opt/anaconda3/bin:$PATH"
source /opt/anaconda3/etc/profile.d/conda.sh
conda create -n virtualenv python=3.8
conda activate virtualenv
echo "FINKI FCC"
Example with 32 GB GPU:
#!/bin/bash
#SBATCH --ntasks-per-node=2
#SBATCH --time=1:00:00
#SBATCH --job-name=test_job
#SBATCH --mem=1G
#SBATCH --error=testerror_%j.error
#SBATCH --cpus-per-task=1
#SBATCH --output=testoutput_%j.out
#SBATCH --gres=gpu:2
#SBATCH --nodelist=cuda1
export PATH="/opt/anaconda3/bin:$PATH"
source /opt/anaconda3/etc/profile.d/conda.sh
conda create -n virtualenv python=3.8
conda activate virtualenv
echo "FINKI FCC"
Example with 48 GB GPU:
#!/bin/bash
#SBATCH --ntasks-per-node=2
#SBATCH --time=1:00:00
#SBATCH --job-name=test_job
#SBATCH --mem=1G
#SBATCH --error=testerror_%j.error
#SBATCH --cpus-per-task=1
#SBATCH --output=testoutput_%j.out
#SBATCH --gres=gpu:1
#SBATCH --nodelist=cuda4
export PATH="/opt/anaconda3/bin:$PATH"
source /opt/anaconda3/etc/profile.d/conda.sh
conda create -n virtualenv python=3.8
conda activate virtualenv
echo "FINKI FCC"
Example with 96 GB GPU:
#!/bin/bash
#SBATCH --ntasks-per-node=2
#SBATCH --time=1:00:00
#SBATCH --job-name=test_job
#SBATCH --mem=1G
#SBATCH --error=testerror_%j.error
#SBATCH --cpus-per-task=1
#SBATCH --output=testoutput_%j.out
#SBATCH --gres=gpu:2
#SBATCH --nodelist=cuda4
export PATH="/opt/anaconda3/bin:$PATH"
source /opt/anaconda3/etc/profile.d/conda.sh
conda create -n virtualenv python=3.8
conda activate virtualenv
echo "FINKI FCC"