Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • k202009/ssh_scripts
  • k204213/ssh_scripts
2 results
Show changes
Commits on Source (17)
# SSH scripts for connecting to mistral.dkrz.de
# SSH scripts for connecting to levante.dkrz.de
These scripts help you to establish a connection to vncserver or
Jupyter Notebook running on a mistral node. The scripts handle job submission,
Jupyter Notebook running on a levante node. The scripts handle job submission,
tunnel set-up, and start the client application on your local system.
As bash scripts they run natively on Linux, MacOS, and other Unix-like
systems. After installing WSL (Windows Subsystem for Linux), they also run
on Microsoft Windows 10.
\ No newline at end of file
on Microsoft Windows 10.
......@@ -41,7 +41,7 @@
# jupyter in a job on dedicated resources. Otherwise jupyter uses a
# shared interactive node.
#
# In case of problems contact Mathis Rosenhauer <rosenhauer@dkrz.de>.
# In case of problems contact support@dkrz.de.
#
set -eufo pipefail
......@@ -91,8 +91,13 @@ SJ_INCFILE=""
#
# Must be directly accessible from client. The frontend and the node
# where jupyter is running need a shared home file system.
readonly SJ_FRONTEND_HOST="mistralpp.dkrz.de"
readonly SJ_FRONTEND_HOST="levante.dkrz.de"
# Image
#
# start the jupyter notebook in a singularity container from a given
# image name.
SJ_IMAGE=""
function clean_up () {
trap - ERR EXIT
......@@ -134,6 +139,7 @@ Available values for OPTION include:
-i file source file prior to running jupyter
-n ntasks request ntasks tasks for job
-p partition run job on partition
-s image singularity image
-t time job runtime
-u username use username for login
......@@ -142,7 +148,7 @@ EOF
function parse_options () {
local option
while getopts 'A:c:di:n:p:t:u:' option; do
while getopts 'A:c:di:n:p:s:t:u:' option; do
case ${option} in
A) SJ_ACCTCODE="$OPTARG"
;;
......@@ -156,6 +162,8 @@ function parse_options () {
;;
p) SJ_PARTITION="$OPTARG"
;;
s) SJ_IMAGE="$OPTARG"
;;
t) SJ_RUNTIME="$OPTARG"
;;
u) SJ_USERNAME="$OPTARG"
......@@ -173,6 +181,7 @@ function parse_options () {
readonly SJ_PARTITION
readonly SJ_RUNTIME
readonly SJ_USERNAME
readonly SJ_IMAGE
}
function ssh_frontend () {
......@@ -196,7 +205,7 @@ function source_incfile() {
fi
commandline="source ${incfile}; ${commandline}"
else
commandline="module load python3/unstable; ${commandline}"
commandline="module load python3; ${commandline}"
fi
echo "${commandline}"
}
......@@ -216,6 +225,9 @@ function assemble_commandline () {
if [[ -z ${SJ_ACCTCODE} ]]; then
commandline="nohup ${commandline} > /dev/null & echo \$!"
fi
if [[ -n ${SJ_IMAGE} ]]; then
commandline="singularity exec ${SJ_IMAGE} ${commandline}"
fi
commandline="$(source_incfile "${commandline}")"
echo "${commandline}"
}
......@@ -233,7 +245,6 @@ function submit_jupyter_job () {
#SBATCH --account=${SJ_ACCTCODE}
#SBATCH --output=/dev/null
#SBATCH --parsable
#SBATCH --dkrzepilog=0
cd \${HOME}
echo "NODE:\${SLURM_JOB_NODELIST}" > ${logfile}
${commandline}
......@@ -280,6 +291,16 @@ function get_jupyter_node () {
fi
}
get_jumphost_options () {
local node="$1"
# Check for -J option introduced with OpenSSH 7.3
if (ssh || true) 2>&1 | grep -q -- "-J" ; then
echo "-J ${SJ_USERNAME}@${SJ_FRONTEND_HOST}"
else
echo "-o ProxyCommand=ssh ${SJ_USERNAME}@${SJ_FRONTEND_HOST} nc ${node} 22"
fi
}
function open_tunnel () {
local node="$1"
local port="$2"
......@@ -288,20 +309,12 @@ function open_tunnel () {
# Tunnel to notebook in job needs jump host since nodes
# usually have no direct external access. Unfortunately, ssh
# doesn't seem to support connection sharing for the jump host
if ! ssh -o ForwardX11=no \
-o StrictHostKeyChecking=accept-new \
-J "${SJ_USERNAME}@${SJ_FRONTEND_HOST}" \
-L "${port}:localhost:${port}" \
-Nf \
"${SJ_USERNAME}@${node}"; then
echo "Please consider updating your local ssh client!"
ssh -o ForwardX11=no \
-o StrictHostKeyChecking=ask \
-o ProxyCommand="ssh ${SJ_USERNAME}@${SJ_FRONTEND_HOST} nc ${node} 22" \
-L "${port}:localhost:${port}" \
-Nf \
"${SJ_USERNAME}@${node}"
fi
ssh -o ForwardX11=no \
-o StrictHostKeyChecking=accept-new \
"$(get_jumphost_options ${node})" \
-L "${port}:localhost:${port}" \
-Nf \
"${SJ_USERNAME}@${node}"
else
ssh_frontend "" "-O forward -L${port}:localhost:${port}"
fi
......
#!/bin/bash
#
# Copyright 2019 Deutsches Klimarechenzentrum GmbH
# Copyright 2022 Deutsches Klimarechenzentrum GmbH
#
# All rights reserved.
#
......@@ -32,17 +32,18 @@
# start-vnc
#
# This script is intended to be used on your local workstation running
# Ubuntu or Fedora Linux or macOS (tested). Other Unix flavors may
# work as well. It allows you to connect to one of DKRZ's
# visualization nodes to work remotely on your visualizations.
# Ubuntu or Fedora Linux or macOS (tested). Other Unix flavors,
# including WSL on Microsoft Windows, may work as well. The script
# allows you to connect to one of DKRZ's HPC nodes to work in a
# desktop environment on that node.
#
# Technically, it starts a VNC server on one of the GPU nodes by
# Technically, it starts a VNC server on one of the nodes by
# submitting a generated job script. A local vncviewer client is used
# to connect to the server over an encypted ssh tunnel. After the
# to connect to the server over an encrypted ssh tunnel. After the
# client shuts down, the server job is terminated. TigerVNC, TightVNC,
# and TurboVNC are known to work.
#
# In case of problems contact Mathis Rosenhauer <rosenhauer@dkrz.de>.
# In case of problems, contact support@dkrz.de.
#
set -eufo pipefail
......@@ -50,13 +51,22 @@ set -eufo pipefail
# Default settings
#
# You can change the settings here or override them with command line
# options. SVNC_ACCTCODE is the only parameter you have to provide if
# your local username matches your username on the frontend and if
# vncviewer is installed in the search path (tight/tigervnc on Linux
# for example).
# options. SVNC_SBATCH_OPTIONS is the only parameter you have to
# provide if your local username matches your username on the frontend
# and if vncviewer is installed in the search path (Tight/TigerVNC on
# Linux for example).
# Project account code
SVNC_ACCTCODE=""
# These options get passed to sbatch(1). Here you have to specify your
# account code and resource requirements.
SVNC_SBATCH_OPTIONS=""
# SVNC_SBATCH_OPTIONS="-A bk0555 -p interactive -n 4"
#
# This example would start a VNC session on the interactive partition
# with two physical (4 SMT) cores and 7680 MB of RAM (default). You
# have to fill in your project's account code (-A option). Adapt to
# your needs. In particular, if you need more memory.
# LDAP username
#
......@@ -66,50 +76,34 @@ SVNC_USERNAME="$(id -un)"
# Path to local vncviewer
#
# If your local vnc client is not in the search path or named
# differently (e.g. TurboVNC, macOS), then change this parameter.
# If your local VNC client is not in the search path or named
# differently (e.g. TurboVNC on macOS), then change this parameter.
SVNC_CLIENT="vncviewer"
# Server options
# VNC Server options
#
# More options for the vncserver. TurboVNC on A Mac may produce
# a 'javax.net.ssl.SSLHandshakeException' in this case adding
# ' -securitytypes none' to SVNC_SERVER_OPTIONS may help.
# More options for the vncserver. TurboVNC on a Mac may produce a
# 'javax.net.ssl.SSLHandshakeException'. In this case, adding '
# -securitytypes none' to SVNC_SERVER_OPTIONS may help.
SVNC_SERVER_OPTIONS="-geometry 1920x1200"
# Session run time in minutes
SVNC_RUNTIME=240
# Number of GPU nodes reserved. Default is the special value "half"
# which allocates 24 cores of a node. To allocate one or more nodes,
# set to the integer number of nodes.
SVNC_NODES="half"
# Quality of service
SVNC_QOS="normal"
# Constraint for feature selection (GPU and/or memory)
SVNC_CONSTRAINT=""
# Uncomment this if you use TurboVNC as vncviewer.
#
# TurboVNC will work without this option but then it will use its own
# implementation of ssh which dosen't support public key auth among other
# things.
# implementation of ssh which doesn't support public key
# authentication among other things.
#
# readonly SVNC_CLIENT_OPTIONS="-extssh"
# SVNC_CLIENT_OPTIONS="-extssh"
# Frontend host
#
# Must be directly accessible from client. The frontend and the node
# where vncserver is running need a shared home file system. You will
# have to change this for other sites than DKRZ (along with many other
# things).
readonly SVNC_FRONTEND_HOST="mistral.dkrz.de"
# Must be directly accessible from client. The front end and the node
# where vncserver is running need a shared home file system.
readonly SVNC_FRONTEND_HOST="levante.dkrz.de"
# Copy vncpassword temporarily to the local workstation
# Copy VNC password temporarily to the local workstation
#
# If your vncviewer client has trouble with TurboVNC's password format
# If your vncviewer client has trouble with TurboVNC's password format,
# then disable this option (set to "false" or comment).
readonly SVNC_PASSWORD=true
......@@ -122,28 +116,30 @@ clean_up () {
set +e
if [[ "${SVNC_KILLSERVER}" = true ]]; then
if [[ -n ${vnc_host:-} ]]; then
echo "Killing vncserver :${vnc_display} on ${vnc_host}."
ssh_frontend "ssh ${vnc_host} \"/opt/TurboVNC/bin/vncserver \
-kill :${vnc_display}\""
if [[ -n ${SVNC_HOST:-} ]]; then
echo "Killing vncserver :${SVNC_DISPLAY} on ${SVNC_HOST}."
ssh -o StrictHostKeyChecking=ask \
"$(get_jumphost_options)" \
"${SVNC_USERNAME}@${SVNC_HOST}" \
"/opt/TurboVNC/bin/vncserver -kill :${SVNC_DISPLAY}"
fi
if [[ -n ${job_id:-} ]]; then
echo "Removing job ${job_id}."
ssh_frontend "scancel -Q ${job_id}; sleep 1; \
rm -f .startvnc/out.${job_id}"
if [[ -n ${SVNC_JOB_ID:-} ]]; then
echo "Removing job ${SVNC_JOB_ID}."
ssh_frontend "scancel -Q ${SVNC_JOB_ID}; sleep 1; \
rm -f .startvnc/out.${SVNC_JOB_ID}"
else
echo "Job ID not available. Make sure the vncjob is not running!"
echo "Job ID not known. You may have to kill the job manually."
ssh_frontend "squeue -u ${SVNC_USERNAME}"
fi
else
echo "Kept job ${job_id} running."
echo "Kept job ${SVNC_JOB_ID} running."
fi
ssh_frontend "" "-O exit"
rmdir "${ssh_socket_dir}"
rmdir "${SVNC_SOCKET_DIR}"
# Remove local vnc PasswordFile
# Remove local VNC password file
if [[ "${SVNC_PASSWORD}" = true ]]; then
rm -f "vnc_passwd"
fi
......@@ -156,165 +152,172 @@ Usage: $(basename "$0") [OPTION]
Available values for OPTION include:
-A acctcode acctcode of job
-C list constraint for feature selection
-n nodes number of nodes
-q qos job qos
-s options addidtional options for vncserver
-t time job runtime
-u username use username for login
-v path path to vncviewer program
-x options addidtional options for vncviewer
-b options options passed to sbatch
-s options additional options for vncserver
-u username use username for login
-v path path to vncviewer program
-x options additional options for vncviewer
EXAMPLE
start-vnc -b "-A bk0555 -p interactive -n 4"
This would start a VNC session on the interactive partition with two
physical (4 SMT) cores. You have to fill in your project's account
code (-A option). Adapt to your needs. In particular, if you need more
memory.
EOF
}
ssh_frontend () {
local command="$1"
local extra_options="${2:-}"
local options
options="${extra_options} -o ForwardX11=no \
-o ControlPath=${ssh_socket_dir}/control:%h:%p:%r"
local options="${2:-}"
options+=" -o ForwardAgent=no -o ForwardX11=no \
-o ControlPath=${SVNC_SOCKET_DIR}/control:%h:%p:%r"
ssh ${options} "${SVNC_USERNAME}@${SVNC_FRONTEND_HOST}" "${command}"
}
parse_options () {
local option
while getopts 'A:C:n:q:s:t:u:v:x:' option; do
while getopts 'b:s:u:v:x:?h' option; do
case ${option} in
A) SVNC_ACCTCODE="$OPTARG"
;;
C) SVNC_CONSTRAINT="$OPTARG"
;;
n) SVNC_NODES="$OPTARG"
;;
q) SVNC_QOS="$OPTARG"
b) SVNC_SBATCH_OPTIONS="$OPTARG"
;;
s) SVNC_SERVER_OPTIONS="$OPTARG"
;;
t) SVNC_RUNTIME="$OPTARG"
;;
u) SVNC_USERNAME="$OPTARG"
;;
v) SVNC_CLIENT="$OPTARG"
;;
x) SVNC_CLIENT_OPTIONS="$OPTARG"
;;
?) usage
h|?) usage
exit 1
;;
esac
done
readonly SVNC_ACCTCODE
readonly SVNC_CONSTRAINT
readonly SVNC_SBATCH_OPTIONS
readonly SVNC_SERVER_OPTIONS
readonly SVNC_QOS
readonly SVNC_RUNTIME
readonly SVNC_USERNAME
readonly SVNC_CLIENT
readonly SVNC_CLIENT_OPTIONS
if [[ -z ${SVNC_ACCTCODE} ]]; then
printf "ERROR: Please specify an acctcode.\n\n" >&2
if [[ -z ${SVNC_SBATCH_OPTIONS} ]]; then
echo "No sbatch options provided. Set -b option or edit script."
usage
exit 1
fi
}
prepare_vncserver () {
ssh_frontend "mkdir -p .startvnc"
vnc_password_check () {
if ! ssh_frontend "test -s .vnc/passwd"; then
echo "No VNC password found. Please set now."
echo "Do not use your LDAP password. Eight characters maximum."
ssh_frontend "salloc -Q -n1 -pgpu -A${SVNC_ACCTCODE} -- /bin/bash -c \
'ssh -tt \$SLURM_JOB_NODELIST -- mkdir -p .vnc \
&& /opt/TurboVNC/bin/vncpasswd'" "-t"
cat <<EOF
Set your VNC password now.
Do not use your LDAP password. Eight characters maximum.
Wait for the prompt.
EOF
ssh_frontend "salloc -Q ${SVNC_SBATCH_OPTIONS} \
srun -n1 --pty /opt/TurboVNC/bin/vncpasswd" "-t"
fi
}
submit_vnc_job () {
local sbatch_resources
if [[ ${SVNC_NODES} = "half" ]]; then
sbatch_resources="#SBATCH --ntasks=24"
else
sbatch_resources="#SBATCH --nodes=${SVNC_NODES}"
sbatch_resources+=$'\n#SBATCH --exclusive'
fi
ssh_frontend "cd .startvnc && sbatch" <<EOF
ssh_frontend "mkdir -p .startvnc && sbatch ${SVNC_SBATCH_OPTIONS}" <<EOF
#!/bin/bash -l
#SBATCH --job-name=vncserver
#SBATCH --partition=gpu
#SBATCH --constraint=${SVNC_CONSTRAINT}
${sbatch_resources}
#SBATCH --qos=${SVNC_QOS}
#SBATCH --time=${SVNC_RUNTIME}
#SBATCH --account=${SVNC_ACCTCODE}
#SBATCH --output=out.%j
#SBATCH --output=.startvnc/out.%j
#SBATCH --parsable
#SBATCH --dkrzepilog=0
cd \${HOME}
/opt/TurboVNC/bin/vncserver -fg -localhost ${SVNC_SERVER_OPTIONS}
/opt/TurboVNC/bin/vncserver -fg -localhost -wm /sw/bin/vncsession ${SVNC_SERVER_OPTIONS}
EOF
}
get_vnc_host_and_display () {
local job_id="$1"
# Extract host name and display number from server output.
# Print heartbeat every second so this script fails if the
# ssh connection to the main script breaks down.
ssh_frontend "/bin/bash -s" <<EOF
logfile=\${HOME}/.startvnc/out.${job_id}
logfile=\${HOME}/.startvnc/out.${SVNC_JOB_ID}
while [[ -z \${host_and_display} ]]; do
sleep 1
if [[ -f \${logfile} ]]; then
host_and_display="\$(grep -Po "started on display \Kmg[0-9]+:[0-9]+" \
\${logfile})"
host_and_display="\$(grep -Po "started on display \K[\w\.]+:[0-9]+" \${logfile})"
fi
printf "." >&2
done
printf "\n" >&2
echo \${host_and_display}
echo "SUCCESS:\${host_and_display}"
EOF
}
get_jumphost_options () {
# Check for -J option introduced with OpenSSH 7.3
if (ssh || true) 2>&1 | grep -q -- "-J" ; then
echo "-J ${SVNC_USERNAME}@${SVNC_FRONTEND_HOST}"
else
echo "-o ProxyCommand=ssh ${SVNC_USERNAME}@${SVNC_FRONTEND_HOST} nc ${SVNC_HOST} 22"
fi
}
main () {
parse_options "$@"
trap clean_up INT QUIT TERM ERR EXIT
# Create ssh socket dir
mkdir -p "${HOME}/.ssh"
ssh_socket_dir="$(mktemp -d "${HOME}/.ssh/socket.XXXXX")"
SVNC_SOCKET_DIR="$(mktemp -d "${HOME}/.ssh/socket.XXXXX")"
# Connection sharing set-up
ssh_frontend "" "-MNf"
prepare_vncserver
echo "Submitting vncserver job."
job_id="$(submit_vnc_job)"
# Check for password and set if missing
vnc_password_check
printf "Waiting for job ${job_id} to start" >&2
local host_and_display
host_and_display="$(get_vnc_host_and_display "$job_id")"
vnc_host=${host_and_display%:*}
vnc_display=${host_and_display#*:}
echo "Vncserver started on node ${vnc_host}.dkrz.de display \
:${vnc_display}."
# Submit server job
echo "Submitting vncserver job."
SVNC_JOB_ID="$(submit_vnc_job)"
# Get host and display once job started
printf "Waiting for job ${SVNC_JOB_ID} to start" >&2
local saved_IFS=${IFS}
local status
IFS=: read -r status SVNC_HOST SVNC_DISPLAY <<< "$(IFS=${saved_IFS} get_vnc_host_and_display)"
if [[ "${status}" != "SUCCESS" ]]; then
printf "\nCould not start VNC server\n" >&2
exit 1
fi
echo "Vncserver started on node ${SVNC_HOST} display :${SVNC_DISPLAY}."
# Establish tunnel
#
# WSL doesn't seem to detect used ports so randomize
# local port to reduce risk of masking.
local vnc_port_local=$((5900 + RANDOM % 100))
local vnc_port_remote=$(( 5900 + vnc_display ))
local vnc_port_remote=$(( 5900 + SVNC_DISPLAY ))
local jumphost_options="$(get_jumphost_options)"
until ssh -o ForwardX11=no \
-o StrictHostKeyChecking=ask \
"${jumphost_options}" \
-L "${vnc_port_local}:localhost:${vnc_port_remote}" \
-Nf \
"${SVNC_USERNAME}@${vnc_host}.dkrz.de"; do
"${SVNC_USERNAME}@${SVNC_HOST}"; do
vnc_port_local=$((5900 + RANDOM % 100))
echo "Trying local port ${vnc_port_local}."
done
local client_options
client_options="${SVNC_CLIENT_OPTIONS:-}"
# Build client options including password
local client_options="${SVNC_CLIENT_OPTIONS:-}"
if [[ "${SVNC_PASSWORD}" = true ]]; then
echo "Fetching password from frontend."
ssh_frontend "cat .vnc/passwd" > vnc_passwd
client_options+=" -passwd vnc_passwd"
fi
echo "Connecting vncviewer to ${vnc_host}.dkrz.de"
# Start client
echo "Connecting vncviewer to ${SVNC_HOST}"
"${SVNC_CLIENT}" ${client_options} :$(( vnc_port_local - 5900 ))
}
......