From 553d9867cc2bc5693b9e9f98650f02fb68741640 Mon Sep 17 00:00:00 2001 From: Althea Denlinger Date: Thu, 25 Jul 2024 09:41:11 -0700 Subject: [PATCH 1/5] Add spack files for `chicoma-cpu` with `nvidia` --- mache/spack/chicoma-cpu_nvidia_mpich.csh | 49 +++++++ mache/spack/chicoma-cpu_nvidia_mpich.sh | 49 +++++++ mache/spack/chicoma-cpu_nvidia_mpich.yaml | 158 ++++++++++++++++++++++ 3 files changed, 256 insertions(+) create mode 100644 mache/spack/chicoma-cpu_nvidia_mpich.csh create mode 100644 mache/spack/chicoma-cpu_nvidia_mpich.sh create mode 100644 mache/spack/chicoma-cpu_nvidia_mpich.yaml diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.csh b/mache/spack/chicoma-cpu_nvidia_mpich.csh new file mode 100644 index 0000000..a1bd109 --- /dev/null +++ b/mache/spack/chicoma-cpu_nvidia_mpich.csh @@ -0,0 +1,49 @@ +setenv http_proxy http://proxyout.lanl.gov:8080/ +setenv https_proxy http://proxyout.lanl.gov:8080/ +setenv ftp_proxy http://proxyout.lanl.gov:8080 +setenv HTTP_PROXY http://proxyout.lanl.gov:8080 +setenv HTTPS_PROXY http://proxyout.lanl.gov:8080 +setenv FTP_PROXY http://proxyout.lanl.gov:8080 + +source /usr/share/lmod/8.3.1/init/csh + +module rm PrgEnv-gnu +module rm PrgEnv-nvidia +module rm PrgEnv-cray +module rm PrgEnv-aocc +module rm craype-accel-nvidia80 +module rm craype-accel-host + +module load PrgEnv-nvidia +module load nvidia/22.7 +module load craype-x86-milan +module load libfabric/1.15.2.0 +module load craype-accel-host +module load craype +module load cray-mpich/8.1.26 +{% if e3sm_lapack %} +module load cray-libsci/23.02.1.1 +{% endif %} +{% if e3sm_hdf5_netcdf %} +module rm cray-hdf5-parallel +module rm cray-netcdf-hdf5parallel +module rm cray-parallel-netcdf +module load cray-hdf5-parallel/1.12.2.3 +module load cray-netcdf-hdf5parallel/4.9.0.3 +module load cray-parallel-netcdf/1.12.3.3 +{% endif %} + +setenv MPICH_ENV_DISPLAY 1 +setenv MPICH_VERSION_DISPLAY 1 +## purposefully omitting OMP variables that cause trouble in ESMF +# setenv OMP_STACKSIZE 128M +# setenv OMP_PROC_BIND spread +# setenv OMP_PLACES threads +setenv HDF5_USE_FILE_LOCKING FALSE +setenv PERL5LIB /usr/projects/climate/SHARED_CLIMATE/software/chicoma-cpu/perl5-only-switch/lib/perl5 +setenv PNETCDF_HINTS "romio_ds_write=disable;romio_ds_read=disable;romio_cb_write=enable;romio_cb_read=enable" +setenv MPICH_COLL_SYNC MPI_Bcast +setenv MPICH_GPU_SUPPORT_ENABLED 1 + +setenv LD_LIBRARY_PATH $CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH +setenv BLA_VENDOR NVHPC diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.sh b/mache/spack/chicoma-cpu_nvidia_mpich.sh new file mode 100644 index 0000000..9f58164 --- /dev/null +++ b/mache/spack/chicoma-cpu_nvidia_mpich.sh @@ -0,0 +1,49 @@ +export http_proxy=http://proxyout.lanl.gov:8080/ +export https_proxy=http://proxyout.lanl.gov:8080/ +export ftp_proxy=http://proxyout.lanl.gov:8080 +export HTTP_PROXY=http://proxyout.lanl.gov:8080 +export HTTPS_PROXY=http://proxyout.lanl.gov:8080 +export FTP_PROXY=http://proxyout.lanl.gov:8080 + +source /usr/share/lmod/8.3.1/init/sh + +module rm PrgEnv-gnu +module rm PrgEnv-nvidia +module rm PrgEnv-cray +module rm PrgEnv-aocc +module rm craype-accel-nvidia80 +module rm craype-accel-host + +module load PrgEnv-nvidia +module load nvidia/22.7 +module load craype-x86-milan +module load libfabric/1.15.2.0 +module load craype-accel-host +module load craype +module load cray-mpich/8.1.26 +{% if e3sm_lapack %} +module load cray-libsci/23.02.1.1 +{% endif %} +{% if e3sm_hdf5_netcdf %} +module rm cray-hdf5-parallel +module rm cray-netcdf-hdf5parallel +module rm cray-parallel-netcdf +module load cray-hdf5-parallel/1.12.2.3 +module load cray-netcdf-hdf5parallel/4.9.0.3 +module load cray-parallel-netcdf/1.12.3.3 +{% endif %} + +export MPICH_ENV_DISPLAY=1 +export MPICH_VERSION_DISPLAY=1 +## purposefully omitting OMP variables that cause trouble in ESMF +# export OMP_STACKSIZE=128M +# export OMP_PROC_BIND=spread +# export OMP_PLACES=threads +export HDF5_USE_FILE_LOCKING=FALSE +export PERL5LIB=/usr/projects/climate/SHARED_CLIMATE/software/chicoma-cpu/perl5-only-switch/lib/perl5 +export PNETCDF_HINTS="romio_ds_write=disable;romio_ds_read=disable;romio_cb_write=enable;romio_cb_read=enable" +export MPICH_COLL_SYNC=MPI_Bcast +export MPICH_GPU_SUPPORT_ENABLED=1 + +export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH +export BLA_VENDOR=NVHPC diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.yaml b/mache/spack/chicoma-cpu_nvidia_mpich.yaml new file mode 100644 index 0000000..eede72e --- /dev/null +++ b/mache/spack/chicoma-cpu_nvidia_mpich.yaml @@ -0,0 +1,158 @@ +spack: + specs: + - cray-mpich +{% if e3sm_lapack %} + - cray-libsci +{% endif %} +{% if e3sm_hdf5_netcdf %} + - hdf5 + - netcdf-c + - netcdf-fortran + - parallel-netcdf +{% endif %} +{{ specs }} + concretizer: + unify: true + packages: + all: + compiler: [nvhpc@22.7] + providers: + mpi: [cray-mpich@8.1.26] +{% if e3sm_lapack %} + lapack: [cray-libsci@23.05.1.4] +{% endif %} + bzip2: + externals: + - spec: bzip2@1.0.6 + prefix: /usr + buildable: false + curl: + externals: + - spec: curl@7.60.0 + prefix: /usr + buildable: false + diffutils: + externals: + - spec: diffutils@3.6 + prefix: /usr + buildable: false + gettext: + externals: + - spec: gettext@0.19.8.1 + prefix: /usr + buildable: false + gmake: + externals: + - spec: gmake@4.2.1 + prefix: /usr + buildable: false + libiconv: + externals: + - spec: libiconv@2.31 + prefix: /usr + buildable: false + libxml2: + externals: + - spec: libxml2@2.9.7 + prefix: /usr + buildable: false + ncurses: + externals: + - spec: ncurses@6.1 + prefix: /usr + buildable: false + openssl: + externals: + - spec: openssl@1.1.0i + prefix: /usr + buildable: false + perl: + externals: + - spec: perl@5.26.1 + prefix: /usr + buildable: false + tar: + externals: + - spec: tar@1.30 + prefix: /usr + buildable: false + xz: + externals: + - spec: xz@5.2.3 + prefix: /usr + buildable: false + python: + externals: + - spec: python@3.10.9 + prefix: /usr/projects/hpcsoft/common/x86_64/anaconda/2023.03-python-3.10 + modules: + - python/3.10-anaconda-2023.03 + buildable: false + cray-mpich: + externals: + - spec: cray-mpich@8.1.26 + prefix: /opt/cray/pe/mpich/8.1.25/ofi/nvidia/20.7 + modules: + - libfabric/1.15.2.0 + - cray-mpich/8.1.26 + buildable: false + libfabric: + externals: + - spec: libfabric@1.15.2.0 + prefix: /opt/cray/libfabric/1.15.2.0 + modules: + - libfabric/1.15.2.0 + buildable: false +{% if e3sm_lapack %} + cray-libsci: + externals: + - spec: cray-libsci@23.05.1.4 + prefix: /opt/cray/pe/libsci/23.02.1.1/NVIDIA/20.7/x86_64 + buildable: false +{% endif %} +{% if e3sm_hdf5_netcdf or system_hdf5_netcdf %} + hdf5: + externals: + - spec: hdf5@1.12.2.3~cxx+fortran+hl~java+mpi+shared + prefix: /opt/cray/pe/hdf5-parallel/1.12.2.3/nvidia/20.7 + modules: + - cray-hdf5-parallel/1.12.2.3 + buildable: false + parallel-netcdf: + externals: + - spec: parallel-netcdf@1.12.3.3+cxx+fortran+pic+shared + prefix: /opt/cray/pe/parallel-netcdf/1.12.3.3/nvidia/20.7 + buildable: false + netcdf-c: + externals: + - spec: netcdf-c@4.9.0.3+mpi~parallel-netcdf + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/nvidia/20.7 + buildable: false + netcdf-fortran: + externals: + - spec: netcdf-fortran@4.5.3 + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/nvidia/20.7 + buildable: false +{% endif %} + config: + install_missing_compilers: false + compilers: + - compiler: + spec: nvhpc@22.7 + paths: + cc: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc + cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc++ + f77: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvfortran + fc: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvfortran + flags: {} + operating_system: sles15 + target: any + modules: + - PrgEnv-nvidia + - nvidia/22.7 + - craype-x86-milan + - craype-accel-host + - libfabric/1.15.2.0 + environment: + prepend_path: + PKG_CONFIG_PATH: "/opt/cray/xpmem/2.5.2-2.4_3.45__gd0f7936.shasta/lib64/pkgconfig" From 4d6c630b931f9919007c2ddaa0fb688f56df61f0 Mon Sep 17 00:00:00 2001 From: Althea Denlinger Date: Thu, 25 Jul 2024 09:41:47 -0700 Subject: [PATCH 2/5] Fix typo and redundant variable definition --- mache/spack/pm-cpu_nvidia_mpich.sh | 1 - mache/spack/pm-cpu_nvidia_mpich.yaml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/mache/spack/pm-cpu_nvidia_mpich.sh b/mache/spack/pm-cpu_nvidia_mpich.sh index 5d3af38..8e51d47 100644 --- a/mache/spack/pm-cpu_nvidia_mpich.sh +++ b/mache/spack/pm-cpu_nvidia_mpich.sh @@ -59,7 +59,6 @@ if [ -z "${NERSC_HOST:-}" ]; then # happens when building spack environment export NERSC_HOST="perlmutter" fi -export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_COLL_SYNC=MPI_Bcast export GATOR_INITIAL_MB=4000MB export BLA_VENDOR=NVHPC diff --git a/mache/spack/pm-cpu_nvidia_mpich.yaml b/mache/spack/pm-cpu_nvidia_mpich.yaml index fe17a1c..fd52827 100644 --- a/mache/spack/pm-cpu_nvidia_mpich.yaml +++ b/mache/spack/pm-cpu_nvidia_mpich.yaml @@ -120,7 +120,7 @@ spack: prefix: /opt/cray/pe/libsci/23.02.1.1/NVIDIA/20.7/x86_64 buildable: false {% endif %} -{% if e3sm_hdf5_netcdf or system_hdf5_netcdf%} +{% if e3sm_hdf5_netcdf or system_hdf5_netcdf %} hdf5: externals: - spec: hdf5@1.12.2.3~cxx+fortran+hl~java+mpi+shared From 971f6c8e05fc1a81842724b03e934701f39125d1 Mon Sep 17 00:00:00 2001 From: Althea Denlinger Date: Thu, 25 Jul 2024 10:34:50 -0700 Subject: [PATCH 3/5] Update version numbers --- mache/spack/chicoma-cpu_nvidia_mpich.csh | 4 ++-- mache/spack/chicoma-cpu_nvidia_mpich.sh | 4 ++-- mache/spack/chicoma-cpu_nvidia_mpich.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.csh b/mache/spack/chicoma-cpu_nvidia_mpich.csh index a1bd109..5802ced 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.csh +++ b/mache/spack/chicoma-cpu_nvidia_mpich.csh @@ -14,7 +14,7 @@ module rm PrgEnv-aocc module rm craype-accel-nvidia80 module rm craype-accel-host -module load PrgEnv-nvidia +module load PrgEnv-nvidia/8.4.0 module load nvidia/22.7 module load craype-x86-milan module load libfabric/1.15.2.0 @@ -22,7 +22,7 @@ module load craype-accel-host module load craype module load cray-mpich/8.1.26 {% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 +module load cray-libsci/23.05.1.4 {% endif %} {% if e3sm_hdf5_netcdf %} module rm cray-hdf5-parallel diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.sh b/mache/spack/chicoma-cpu_nvidia_mpich.sh index 9f58164..eff40ba 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.sh +++ b/mache/spack/chicoma-cpu_nvidia_mpich.sh @@ -14,7 +14,7 @@ module rm PrgEnv-aocc module rm craype-accel-nvidia80 module rm craype-accel-host -module load PrgEnv-nvidia +module load PrgEnv-nvidia/8.4.0 module load nvidia/22.7 module load craype-x86-milan module load libfabric/1.15.2.0 @@ -22,7 +22,7 @@ module load craype-accel-host module load craype module load cray-mpich/8.1.26 {% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 +module load cray-libsci/23.05.1.4 {% endif %} {% if e3sm_hdf5_netcdf %} module rm cray-hdf5-parallel diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.yaml b/mache/spack/chicoma-cpu_nvidia_mpich.yaml index eede72e..331f617 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.yaml +++ b/mache/spack/chicoma-cpu_nvidia_mpich.yaml @@ -107,7 +107,7 @@ spack: cray-libsci: externals: - spec: cray-libsci@23.05.1.4 - prefix: /opt/cray/pe/libsci/23.02.1.1/NVIDIA/20.7/x86_64 + prefix: /opt/cray/pe/libsci/23.05.1.4/NVIDIA/20.7/x86_64 buildable: false {% endif %} {% if e3sm_hdf5_netcdf or system_hdf5_netcdf %} @@ -148,7 +148,7 @@ spack: operating_system: sles15 target: any modules: - - PrgEnv-nvidia + - PrgEnv-nvidia/8.4.0 - nvidia/22.7 - craype-x86-milan - craype-accel-host From 3485442702cac5f1885b7fbd355f65df1f3963ce Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Fri, 26 Jul 2024 02:40:09 -0600 Subject: [PATCH 4/5] A few fixes from @xylar's code review --- mache/spack/chicoma-cpu_nvidia_mpich.csh | 1 - mache/spack/chicoma-cpu_nvidia_mpich.sh | 1 - mache/spack/chicoma-cpu_nvidia_mpich.yaml | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.csh b/mache/spack/chicoma-cpu_nvidia_mpich.csh index 5802ced..ff595fc 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.csh +++ b/mache/spack/chicoma-cpu_nvidia_mpich.csh @@ -43,7 +43,6 @@ setenv HDF5_USE_FILE_LOCKING FALSE setenv PERL5LIB /usr/projects/climate/SHARED_CLIMATE/software/chicoma-cpu/perl5-only-switch/lib/perl5 setenv PNETCDF_HINTS "romio_ds_write=disable;romio_ds_read=disable;romio_cb_write=enable;romio_cb_read=enable" setenv MPICH_COLL_SYNC MPI_Bcast -setenv MPICH_GPU_SUPPORT_ENABLED 1 setenv LD_LIBRARY_PATH $CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH setenv BLA_VENDOR NVHPC diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.sh b/mache/spack/chicoma-cpu_nvidia_mpich.sh index eff40ba..b0e6ed5 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.sh +++ b/mache/spack/chicoma-cpu_nvidia_mpich.sh @@ -43,7 +43,6 @@ export HDF5_USE_FILE_LOCKING=FALSE export PERL5LIB=/usr/projects/climate/SHARED_CLIMATE/software/chicoma-cpu/perl5-only-switch/lib/perl5 export PNETCDF_HINTS="romio_ds_write=disable;romio_ds_read=disable;romio_cb_write=enable;romio_cb_read=enable" export MPICH_COLL_SYNC=MPI_Bcast -export MPICH_GPU_SUPPORT_ENABLED=1 export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH export BLA_VENDOR=NVHPC diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.yaml b/mache/spack/chicoma-cpu_nvidia_mpich.yaml index 331f617..1fce056 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.yaml +++ b/mache/spack/chicoma-cpu_nvidia_mpich.yaml @@ -91,7 +91,7 @@ spack: cray-mpich: externals: - spec: cray-mpich@8.1.26 - prefix: /opt/cray/pe/mpich/8.1.25/ofi/nvidia/20.7 + prefix: /opt/cray/pe/mpich/8.1.26/ofi/nvidia/20.7 modules: - libfabric/1.15.2.0 - cray-mpich/8.1.26 From da6359501166e81c4df584f2ec699a10ebaa7453 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Fri, 26 Jul 2024 04:23:38 -0600 Subject: [PATCH 5/5] Add cmake to chicoma-cpu nvidia --- mache/spack/chicoma-cpu_nvidia_mpich.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mache/spack/chicoma-cpu_nvidia_mpich.yaml b/mache/spack/chicoma-cpu_nvidia_mpich.yaml index 1fce056..0a131bb 100644 --- a/mache/spack/chicoma-cpu_nvidia_mpich.yaml +++ b/mache/spack/chicoma-cpu_nvidia_mpich.yaml @@ -26,6 +26,11 @@ spack: - spec: bzip2@1.0.6 prefix: /usr buildable: false + cmake: + externals: + - spec: cmake@3.25.1 + prefix: /usr/projects/hpcsoft/cos2/common/x86_64/cmake/3.25.1 + buildable: false curl: externals: - spec: curl@7.60.0