-
Lukas Werner authoredLukas Werner authored
run.sh 4.33 KiB
#!/bin/bash
set -euf -o pipefail
shopt -s inherit_errexit
MAX_CI_JOB_RUNTIME=120
TXT_RED="\e[31m"
TXT_BLUE="\e[94m"
TXT_GREEN="\e[32m"
TXT_CLEAR="\e[0m"
TXT_BOLD="\e[1m"
hash awk
hash chown
hash diff
hash getent
hash id
hash runuser
hash salloc
hash sed
hash srun
hash ssh-keygen
hash sinfo
function error {
: "${RV:=${2:-$BUILD_FAILURE_EXIT_CODE}}"
echo -e "${TXT_RED}${TXT_BOLD}$1${TXT_CLEAR}" >&2
return "$RV"
}
## User authentication
[ -z "$CUSTOM_ENV_AUTH_USER" ] && error "AUTH_USER CI/CD variable has not been set."
[ -z "$CUSTOM_ENV_AUTH_KEY" ] && error "AUTH_KEY secret CI/CD variable has not been set."
AUTH_USER=$CUSTOM_ENV_AUTH_USER
## Check if the user exists and the validity of its ID
id -u "$AUTH_USER" >/dev/null 2>&1 || error "User $AUTH_USER does not exist"
(( $(id -u "$AUTH_USER") >= 1000 )) || error "User $AUTH_USER ID within system reserved range."
## Use a key pair to authenticate the user (private key has to be set as a GitLab CI/CD variable)
AUTH_KEY=$CUSTOM_ENV_AUTH_KEY
AUTH_PUB=/etc/gitlab-runner/authorized_keys
(
while read -r PUB
do
diff --color=never <(ssh-keygen -y -e -f /dev/stdin <<< "$AUTH_KEY") <(ssh-keygen -y -e -f /dev/stdin <<< "$PUB") > /dev/null && exit 0
done < "$AUTH_PUB"
exit 1
) || error "Authentication for user $AUTH_USER failed."
### Env setup
AUTH_USER_HOME=$(getent passwd "$AUTH_USER" | awk -F ":" '{print $6}')
AUTH_USER_SHELL=$(getent passwd "$AUTH_USER" | awk -F ":" '{print $7}')
AUTH_USER_WORK=$(runuser "$AUTH_USER" --login --command "echo \$WORK")
[ -z "$AUTH_USER_WORK" ] && error "Could not determine $AUTH_USER's \$WORK directory."
chown -R "$AUTH_USER" "$TMPDIR"
BASE_DIR=$AUTH_USER_WORK/gitlab-runner/builds/${CUSTOM_ENV_CI_CONCURRENT_PROJECT_ID:?}/${CUSTOM_ENV_CI_PROJECT_NAMESPACE:?}/${CUSTOM_ENV_CI_PROJECT_NAME:?}
runuser "$AUTH_USER" --login --command "mkdir -p $BASE_DIR"
if [[ ("$2" == "step_script" || "$2" == "build_script") && "${CUSTOM_ENV_NO_SLURM_SUBMIT:-}" != 1 ]]; then
## The script is the one specified in the gitlab-ci.yml script directive and the SUBMIT_TO_SLURM variable is set
for E in $(env | grep -E "^CUSTOM_ENV_SLURM_")
do
export "${E#CUSTOM_ENV_}"
done
: "${SLURM_JOB_NAME:="gitlab-ci-${CUSTOM_ENV_CI_PROJECT_NAME:?}-${CUSTOM_ENV_CI_PIPELINE_ID:?}-${CUSTOM_ENV_CI_JOB_ID:?}"}"
: "${SLURM_TIMELIMIT:=$MAX_CI_JOB_RUNTIME}"
: "${SLURM_TIME:=$SLURM_TIMELIMIT}"
: "${SLURM_NODELIST:="phinally"}" # default node: phinally
if [ ! $(sinfo -n "$SLURM_NODELIST" -h -O NodeList) ]; then
echo -e "${TXT_RED}${TXT_BOLD}Unknown node \"$SLURM_NODELIST\" specified. Available nodes: ${TXT_CLEAR}" >&2
echo "$(sinfo -N -o '%N %c %m')" >&2
error "Exiting..."
fi
SLURM_PARTITION=work
export SLURM_PARTITION
SLURM_NODES=1 # currently only individual nodes can be used
export SLURM_NODES
export SLURM_JOB_NAME
export SLURM_TIME
unset SLURM_TIMELIMIT
export SLURM_NODELIST
# limit max job run time
if [[ $SLURM_TIME > $MAX_CI_JOB_RUNTIME ]]; then
echo "SLURM_TIMELIMIT or SLURM_TIME larger than $MAX_CI_JOB_RUNTIME (s), limiting to $MAX_CI_JOB_RUNTIME." 1>&2
SLURM_TIME=$MAX_CI_JOB_RUNTIME
export SLURM_TIME
fi
# Generate salloc arguments from SLURM_* environment variables
SALLOC_OPTIONS=()
for E in $(env | grep -E "^SLURM_")
do
SALLOC_OPTIONS+=("$(echo "${E#SLURM_}" | awk -F "=" '{gsub("_", "-", $1); print "--"tolower($1)"="$2}')")
done
runuser --login "$AUTH_USER" --command "cp $1 $BASE_DIR.tmp/$CUSTOM_ENV_CI_JOB_ID.sh"
echo "#!/bin/bash -l" > "$TMPDIR"/salloc.sh
echo "salloc --quiet --chdir \"$BASE_DIR\" ${SALLOC_OPTIONS[@]} \
srun --cpu-bind none --wait 0 --kill-on-bad-exit=1 \
\"$AUTH_USER_SHELL\" --login \"$BASE_DIR.tmp/$CUSTOM_ENV_CI_JOB_ID.sh\"" >> "$TMPDIR"/salloc.sh
chmod +x "$TMPDIR"/salloc.sh
echo -e "${TXT_GREEN}${TXT_BOLD}Submitting job to node $SLURM_NODELIST...${TXT_CLEAR}"
exec runuser --login "$AUTH_USER" --command "$TMPDIR"/salloc.sh
#exec runuser --login "$AUTH_USER" --command "salloc --quiet --chdir \"$BASE_DIR\" ${SALLOC_OPTIONS[@]} \
# srun --cpu-bind none --wait 0 --kill-on-bad-exit=1 \
# \"$AUTH_USER_SHELL\" --login \"$BASE_DIR.tmp/$CUSTOM_ENV_CI_JOB_ID.sh\""
else
runuser --login "$AUTH_USER" --command "cd \"$BASE_DIR\"; bash -l $1"
fi