Skip to content
Snippets Groups Projects
Commit 36a26385 authored by Xingran Wang's avatar Xingran Wang
Browse files

generate and submit benchmark script

parent a67d0da7
No related branches found
No related tags found
No related merge requests found
#! /bin/bash
#SBATCH --job-name {{ name_job }}
#SBATCH -t 00:10:00
#SBATCH --nodes {{ num_node }}
#SBATCH --tasks-per-node 48
#SBATCH --partition {{ partition }}
#SBATCH --account highresmonsoon
#SBATCH --output ./LOG.%x.%j.o
#SBATCH --error ./LOG.%x.%j.o
#SBATCH --gres=gpu:1
#SBATCH --parsable
set -e
nvhpc_version=21.5
openmpi_version=4.1.1
module --force purge
ml use "$OTHERSTAGES"
ml Stages/2020
ml NVHPC/${nvhpc_version}-GCC-10.3.0
ml OpenMPI/${openmpi_version}
ml Ruby/2.7.2
ml UCX/1.10.1
ml netCDF-Fortran
ml CMake/3.18.0
# ecCodes is not directly available, load dependencies instead...
ml OpenGL/2020 libaec
ml ecCodes/2.21.0-nompi
module swap netCDF/4.7.4-serial netCDF/4.7.4
set -x
qprefix="/p/scratch/highresmonsoon/cdi-pio-test_dir"
mkdir -p "$qprefix"
qprefix="$qprefix/files_pio_write"
export LD_PRELOAD="/p/software/juwelsbooster/stages/2020/software/UCX/1.11.1/lib/libuct.so.0:/p/software/juwelsbooster/stages/2020/software/UCX/1.11.1/lib/libucp.so.0:/p/software/juwelsbooster/stages/2020/software/UCX/1.11.1/lib/libucs.so.0:/p/software/juwelsbooster/stages/2020/software/UCX/1.11.1/lib/libucm.so.0"
export SCT_PROC_CHOICE={{ sct_proc_choice }}
export SCT_CALLSTATS=1
# reduce output data to decrease test duration
# time ../libtool --mode=execute srun \
# ./pio_write_deco2d.parallel "-qprefix=$qprefix" -qpio-role-scheme={{ pio_role_scheme }} \
# -c -m 384 -n 192 -z 95 -t 20 -y 60 -s 7 \
# -f grb2 -p PIO_MPI_FW_AT_ALL -w 16
time ../libtool --mode=execute srun \
./pio_write.parallel "-qprefix=$qprefix" -qpio-role-scheme={{ pio_role_scheme }} \
-c -m 768 -n 384 -z 95 -t 10 -y 120 -s 7 \
-f {{ format }} -p PIO_MPI_FW_AT_ALL -w {{ num_io_task }}
set +x
from jinja2 import Environment, FileSystemLoader
from subprocess import run, PIPE
partition = "booster"
compiler = "nvhpc_ompi"
branch_short = "1.8.x-tj20220307"
# pio_role_scheme candidate:
# - balanced
# - last
pio_role_scheme = "last"
output_format = "nc4"
config = dict()
config['partition'] = partition
config['sct_proc_choice'] = "SCT_REDUCE_ALL"
# 'format' candidate: nc2 nc4 grb2 grb usw
config['format'] = output_format
config['pio_role_scheme'] = "last"
dependency = "--dependency=afterok:"
job_id = 0
file_loader = FileSystemLoader(".")
env = Environment(loader=file_loader)
template = env.get_template("template_file.sh.jinja")
num_nodes = list(range(5, 11))
num_io_tasks = list(range(8, 56, 8))
for num_node in num_nodes:
for num_io_task in num_io_tasks:
config['name_job'] = (f"pio_benchmark.{partition}.{compiler}"
f".{branch_short}.num_node-{num_node}"
f".num_io_task-{num_io_task}"
f".{pio_role_scheme}"
f".{output_format}")
config['num_node'] = num_node
config['num_io_task'] = num_io_task
run_script = (f"pio_benchmark.num_node-{num_node}"
f".num_io_task-{num_io_task}"
f".{pio_role_scheme}.run"
f".{output_format}")
with open(run_script, mode="w") as f:
f.write(template.render(**config))
f.flush()
for _ in range(3):
if job_id == 0:
# first submit
shell_process = run(['sbatch', run_script],
stdout=PIPE)
else:
# job has been submitted
shell_process = run(['sbatch',
dependency+str(job_id), run_script],
stdout=PIPE)
if shell_process.returncode == 0:
# job submit successful
job_id = shell_process.stdout.decode('UTF-8').rstrip()
print(f"{run_script} submitted with ID {job_id}.")
else:
print(f"failed to submit {run_script}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment