Skip to content

Commit

Permalink
Merge pull request #437 from boegel/2023.06-software.eessi.io_link_nv…
Browse files Browse the repository at this point in the history
…idia_host_libraries_fix

make `link_nvidia_host_libraries.sh` script a bit more robust, in case target of host_injections directory is a non-existing directory
  • Loading branch information
casparvl authored Dec 22, 2023
2 parents 5c322b0 + ac53cf0 commit 44b563c
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 39 deletions.
75 changes: 40 additions & 35 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,42 +203,47 @@ ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# use PR patch file to determine in which easystack files stuff was added
for easystack_file in $(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing'); do

echo -e "Processing easystack file ${easystack_file}...\n\n"

# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}

${EB} --show-config

echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."

if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."

${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?

# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
# copy to current working directory
cp -a ${eb_last_log} .
echo "Last EasyBuild log file copied from ${eb_last_log} to ${PWD}"
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z ${changed_easystacks} ]; then
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
else
for easystack_file in ${changed_easystacks}; do

echo -e "Processing easystack file ${easystack_file}...\n\n"

# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}

${EB} --show-config

echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."

if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."

${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?

# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
# copy to current working directory
cp -a ${eb_last_log} .
echo "Last EasyBuild log file copied from ${eb_last_log} to ${PWD}"
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
fi

$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi

$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi

done

done
fi

### add packages here

Expand Down
16 changes: 12 additions & 4 deletions scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH
nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader"
if $nvidia_smi_command > /dev/null; then
host_driver_version=$($nvidia_smi_command | tail -n1)
echo_green "Found NVIDIA GPU driver version ${host_driver_version}"
# If the first worked, this should work too
host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}')
echo_green "Found host CUDA version ${host_cuda_version}"
else
error="Failed to successfully execute\n $nvidia_smi_command\n"
fatal_error "$error"
Expand All @@ -58,12 +60,18 @@ fi
# Let's make sure the driver libraries are not already in place
link_drivers=1

# first make sure that target of host_injections variant symlink is an existing directory
host_injections_target=$(realpath -m ${EESSI_CVMFS_REPO}/host_injections)
if [ ! -d ${host_injections_target} ]; then
create_directory_structure ${host_injections_target}
fi

host_injections_nvidia_dir="${EESSI_CVMFS_REPO}/host_injections/nvidia/${EESSI_CPU_FAMILY}"
host_injection_driver_dir="${host_injections_nvidia_dir}/host"
host_injection_driver_version_file="$host_injection_driver_dir/driver_version.txt"
if [ -e "$host_injection_driver_version_file" ]; then
if grep -q "$host_driver_version" "$host_injection_driver_version_file"; then
echo_green "The host CUDA driver libraries have already been linked!"
echo_green "The host GPU driver libraries (v${host_driver_version}) have already been linked! (based on ${host_injection_driver_version_file})"
link_drivers=0
else
# There's something there but it is out of date
Expand Down Expand Up @@ -91,8 +99,8 @@ if [ "$link_drivers" -eq 1 ]; then
ls /.singularity.d/libs/* >> "$temp_dir"/libs.txt 2>/dev/null

# Leverage singularity to find the full list of libraries we should be linking to
echo_yellow "Downloading latest version of nvliblist.conf from Apptainer"
curl -o "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf
echo_yellow "Downloading latest version of nvliblist.conf from Apptainer to ${temp_dir}/nvliblist.conf"
curl --silent --output "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf

# Make symlinks to all the interesting libraries
grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i ln -s {}
Expand Down Expand Up @@ -133,4 +141,4 @@ else
ln -s $host_injections_nvidia_dir/latest lib
fi

echo_green "Host NVIDIA gpu drivers linked successfully for EESSI"
echo_green "Host NVIDIA GPU drivers linked successfully for EESSI"

0 comments on commit 44b563c

Please sign in to comment.