From ece834ccbaeaef0450985c552791b3a8a4f3c8f7 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Mon, 8 Jan 2024 09:45:28 +0100 Subject: [PATCH 01/15] Add arch files for Daint, MLux and LUMI. --- arch/cscs/daint/intel/6.0.10/env.sh | 41 +++++ arch/cscs/daint/intel/6.0.10/toolchain.cmake | 149 ++++++++++++++++++ arch/cscs/daint/nvidia/6.0.10/env.sh | 45 ++++++ arch/cscs/daint/nvidia/6.0.10/toolchain.cmake | 53 +++++++ arch/eurohpc/leonardo/nvhpc/23.1/env.sh | 49 ++++++ .../leonardo/nvhpc/23.1/toolchain.cmake | 57 +++++++ arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh | 44 ++++++ .../lumi/amd-gpu/8.3.3/toolchain.cmake | 31 ++++ arch/eurohpc/lumi/amd-host/8.3.3/env.sh | 43 +++++ .../lumi/amd-host/8.3.3/toolchain.cmake | 25 +++ arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh | 49 ++++++ .../lumi/cray-gpu/14.0.2/toolchain.cmake | 42 +++++ arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh | 51 ++++++ .../lumi/cray-gpu/15.0.1/toolchain.cmake | 58 +++++++ arch/eurohpc/lumi/cray-host/14.0.2/env.sh | 42 +++++ .../lumi/cray-host/14.0.2/toolchain.cmake | 35 ++++ arch/eurohpc/meluxina/nvhpc/21.11/env.sh | 58 +++++++ .../meluxina/nvhpc/21.11/toolchain.cmake | 57 +++++++ arch/eurohpc/meluxina/nvhpc/22.3/env.sh | 56 +++++++ .../meluxina/nvhpc/22.3/toolchain.cmake | 57 +++++++ arch/eurohpc/meluxina/nvhpc/22.7/env.sh | 52 ++++++ .../meluxina/nvhpc/22.7/toolchain.cmake | 57 +++++++ 22 files changed, 1151 insertions(+) create mode 100644 arch/cscs/daint/intel/6.0.10/env.sh create mode 100644 arch/cscs/daint/intel/6.0.10/toolchain.cmake create mode 100644 arch/cscs/daint/nvidia/6.0.10/env.sh create mode 100644 arch/cscs/daint/nvidia/6.0.10/toolchain.cmake create mode 100644 arch/eurohpc/leonardo/nvhpc/23.1/env.sh create mode 100644 arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake create mode 100644 arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh create mode 100644 arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake create mode 100644 arch/eurohpc/lumi/amd-host/8.3.3/env.sh create mode 100644 arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake create mode 100644 arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh create mode 100644 arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake create mode 100644 arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh create mode 100644 arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake create mode 100644 arch/eurohpc/lumi/cray-host/14.0.2/env.sh create mode 100644 arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake create mode 100644 arch/eurohpc/meluxina/nvhpc/21.11/env.sh create mode 100644 arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake create mode 100644 arch/eurohpc/meluxina/nvhpc/22.3/env.sh create mode 100644 arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake create mode 100644 arch/eurohpc/meluxina/nvhpc/22.7/env.sh create mode 100644 arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake diff --git a/arch/cscs/daint/intel/6.0.10/env.sh b/arch/cscs/daint/intel/6.0.10/env.sh new file mode 100644 index 0000000..9294001 --- /dev/null +++ b/arch/cscs/daint/intel/6.0.10/env.sh @@ -0,0 +1,41 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module purge -f + +# Load modules +module load daint-gpu +module load PrgEnv-intel/6.0.10 +module swap intel/2021.3.0 intel-classic/2022.1.0 +#module load Boost +module load CMake +module load cudatoolkit/11.2.0_3.39-2.1__gf93aa1c # needed for cmake to find hdf5 +module load cray-hdf5-parallel +module load cray-python + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/cscs/daint/intel/6.0.10/toolchain.cmake b/arch/cscs/daint/intel/6.0.10/toolchain.cmake new file mode 100644 index 0000000..0b4f921 --- /dev/null +++ b/arch/cscs/daint/intel/6.0.10/toolchain.cmake @@ -0,0 +1,149 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# ARCHITECTURE +#################################################################### + +set( EC_HAVE_C_INLINE 1 ) +set( EC_HAVE_FUNCTION_DEF 1 ) +set( EC_HAVE_CXXABI_H 1 ) +set( EC_HAVE_CXX_BOOL 1 ) +set( EC_HAVE_CXX_SSTREAM 1 ) +set( EC_HAVE_CXX_INT_128 0 ) +set( CMAKE_SIZEOF_VOID_P 8 ) +set( EC_SIZEOF_PTR 8 ) +set( EC_SIZEOF_CHAR 1 ) +set( EC_SIZEOF_SHORT 2 ) +set( EC_SIZEOF_INT 4 ) +set( EC_SIZEOF_LONG 8 ) +set( EC_SIZEOF_LONG_LONG 8 ) +set( EC_SIZEOF_FLOAT 4 ) +set( EC_SIZEOF_DOUBLE 8 ) +set( EC_SIZEOF_LONG_DOUBLE 8 ) +set( EC_SIZEOF_SIZE_T 8 ) +set( EC_SIZEOF_SSIZE_T 8 ) +set( EC_SIZEOF_OFF_T 8 ) +set( EC_BIG_ENDIAN 0 ) +set( EC_LITTLE_ENDIAN 1 ) +set( IEEE_BE 0 ) +set( IEEE_LE 1 ) +set( EC_HAVE_FSEEK 1 ) +set( EC_HAVE_FSEEKO 1 ) +set( EC_HAVE_FTELLO 1 ) +set( EC_HAVE_LSEEK 0 ) +set( EC_HAVE_FTRUNCATE 0 ) +set( EC_HAVE_OPEN 0 ) +set( EC_HAVE_FOPEN 1 ) +set( EC_HAVE_FMEMOPEN 1 ) +set( EC_HAVE_FUNOPEN 0 ) +set( EC_HAVE_FLOCK 1 ) +set( EC_HAVE_MMAP 1 ) +set( EC_HAVE_POSIX_MEMALIGN 1 ) +set( EC_HAVE_F_GETLK 1 ) +set( EC_HAVE_F_SETLK 1 ) +set( EC_HAVE_F_SETLKW 1 ) +set( EC_HAVE_F_GETLK64 1 ) +set( EC_HAVE_F_SETLK64 1 ) +set( EC_HAVE_F_SETLKW64 1 ) +set( EC_HAVE_MAP_ANONYMOUS 1 ) +set( EC_HAVE_MAP_ANON 1 ) +set( EC_HAVE_ASSERT_H 1 ) +set( EC_HAVE_STDLIB_H 1 ) +set( EC_HAVE_UNISTD_H 1 ) +set( EC_HAVE_STRING_H 1 ) +set( EC_HAVE_STRINGS_H 1 ) +set( EC_HAVE_SYS_STAT_H 1 ) +set( EC_HAVE_SYS_TIME_H 1 ) +set( EC_HAVE_SYS_TYPES_H 1 ) +set( EC_HAVE_MALLOC_H 1 ) +set( EC_HAVE_SYS_MALLOC_H 0 ) +set( EC_HAVE_SYS_PARAM_H 1 ) +set( EC_HAVE_SYS_MOUNT_H 1 ) +set( EC_HAVE_SYS_VFS_H 1 ) +set( EC_HAVE_OFFT 1 ) +set( EC_HAVE_OFF64T 1 ) +set( EC_HAVE_STRUCT_STAT 1 ) +set( EC_HAVE_STRUCT_STAT64 1 ) +set( EC_HAVE_STAT 1 ) +set( EC_HAVE_STAT64 1 ) +set( EC_HAVE_FSTAT 1 ) +set( EC_HAVE_FSTAT64 1 ) +set( EC_HAVE_FSEEKO64 1 ) +set( EC_HAVE_FTELLO64 1 ) +set( EC_HAVE_LSEEK64 1 ) +set( EC_HAVE_OPEN64 1 ) +set( EC_HAVE_FOPEN64 1 ) +set( EC_HAVE_FTRUNCATE64 1 ) +set( EC_HAVE_FLOCK64 1 ) +set( EC_HAVE_MMAP64 1 ) +set( EC_HAVE_STRUCT_STATVFS 1 ) +set( EC_HAVE_STRUCT_STATVFS64 1 ) +set( EC_HAVE_FOPENCOOKIE 1 ) +set( EC_HAVE_FSYNC 1 ) +set( EC_HAVE_FDATASYNC 1 ) +set( EC_HAVE_DIRFD 1 ) +set( EC_HAVE_SYSPROC 0 ) +set( EC_HAVE_SYSPROCFS 1 ) +set( EC_HAVE_EXECINFO_BACKTRACE 1 ) +set( EC_HAVE_GMTIME_R 1 ) +set( EC_HAVE_GETPWUID_R 1 ) +set( EC_HAVE_GETPWNAM_R 1 ) +set( EC_HAVE_READDIR_R 1 ) +set( EC_HAVE_DIRENT_D_TYPE 1 ) +set( EC_HAVE_GETHOSTBYNAME_R 1 ) +set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 ) +set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 ) +set( EC_HAVE_PROCFS 1 ) +set( EC_HAVE_DLFCN_H 1 ) +set( EC_HAVE_DLADDR 1 ) +set( EC_HAVE_AIOCB 1 ) +set( EC_HAVE_AIOCB64 1 ) + +# Disable relative rpaths as aprun does not respect it +set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE ) + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) +set( ECBUILD_TRUST_FLAGS ON ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) + +set(ECBUILD_Fortran_FLAGS "-g") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=core-avx2") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions") +#set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=1500") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -no-fma") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume realloc_lhs") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-model precise") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ftz") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-speculation=safe") +#set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fast-transcendentals") + +#################################################################### +# LINK FLAGS +#################################################################### + +set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp" ) +set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap" ) +set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap -Wl,--as-needed" ) +set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" ) diff --git a/arch/cscs/daint/nvidia/6.0.10/env.sh b/arch/cscs/daint/nvidia/6.0.10/env.sh new file mode 100644 index 0000000..888d3ec --- /dev/null +++ b/arch/cscs/daint/nvidia/6.0.10/env.sh @@ -0,0 +1,45 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module purge -f + +# Load modules +module load daint-gpu +module load PrgEnv-nvidia/6.0.10 +module swap nvidia/21.3 nvidia/22.5 +#module load Boost +module load CMake +module load cudatoolkit/11.2.0_3.39-2.1__gf93aa1c # needed for cmake to find hdf5 +module load cray-hdf5-parallel +module load cray-python + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" +export BOOST_ROOT=/users/subbiali/boost/1.82.0 diff --git a/arch/cscs/daint/nvidia/6.0.10/toolchain.cmake b/arch/cscs/daint/nvidia/6.0.10/toolchain.cmake new file mode 100644 index 0000000..5ec011b --- /dev/null +++ b/arch/cscs/daint/nvidia/6.0.10/toolchain.cmake @@ -0,0 +1,53 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -gpu=cc60,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# CUDA FLAGS +#################################################################### + +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 60) +endif() +if(NOT DEFINED CMAKE_CUDA_COMPILER) + set(CMAKE_CUDA_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.3/compilers/bin/nvcc) +endif() + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/leonardo/nvhpc/23.1/env.sh b/arch/eurohpc/leonardo/nvhpc/23.1/env.sh new file mode 100644 index 0000000..f1333db --- /dev/null +++ b/arch/eurohpc/leonardo/nvhpc/23.1/env.sh @@ -0,0 +1,49 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# NB: This does currently not support the Serialbox-based build modes +# because the available Boost module does not include the boost_filesystem library + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Load modules +module_load nvhpc/23.1 +module_load openmpi/4.1.4--nvhpc--23.1-cuda-11.8 +module_load cmake/3.24.3 +module_load cuda/11.8 +module_load hdf5/1.12.2--openmpi--4.1.4--nvhpc--23.1 +module_load python/3.10.8--gcc--8.5.0 + +export CC=nvc +export CXX=nvc++ +export F77=nvfortran +export FC=nvfortran +export F90=nvfortran + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +# Variable no longer required, make sure it is not set +unset ECBUILD_TOOLCHAIN diff --git a/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake b/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake new file mode 100644 index 0000000..ce8de9d --- /dev/null +++ b/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake @@ -0,0 +1,57 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module +# unless its stored as a cache variable +set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) + +# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to +# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone +# doesn't have any effect here as the module uses FORCE to overwrite the +# existing value) +set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh b/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh new file mode 100644 index 0000000..67eee0b --- /dev/null +++ b/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh @@ -0,0 +1,44 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load LUMI/22.08 +module_load partition/G +module_load PrgEnv-aocc/8.3.3 +module_load craype-accel-amd-gfx90a +module_load buildtools/22.08 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 + +# Specify compilers +export CC=amdclang CXX=amdclang++ FC=amdflang +#export CC=cc CXX=CC FC=ftn + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake b/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake new file mode 100644 index 0000000..557774a --- /dev/null +++ b/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake @@ -0,0 +1,31 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( OpenMP_Fortran_FLAGS "-fopenmp --offload-arch=gfx90a" CACHE STRING "" ) + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +set( ENABLE_ACC OFF CACHE STRING "" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic -O3") diff --git a/arch/eurohpc/lumi/amd-host/8.3.3/env.sh b/arch/eurohpc/lumi/amd-host/8.3.3/env.sh new file mode 100644 index 0000000..6c3ba71 --- /dev/null +++ b/arch/eurohpc/lumi/amd-host/8.3.3/env.sh @@ -0,0 +1,43 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load LUMI/22.08 +module_load partition/C +module_load PrgEnv-aocc/8.3.3 +module_load craype-accel-host +module_load buildtools/22.08 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 + +# Specify compilers +export CC=cc CXX=CC FC=ftn + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake b/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake new file mode 100644 index 0000000..dac9ed4 --- /dev/null +++ b/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake @@ -0,0 +1,25 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +set( ENABLE_ACC OFF CACHE STRING "" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic -O3") diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh new file mode 100644 index 0000000..2cc0f9b --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh @@ -0,0 +1,49 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/22.08 +# module_load partition/G +module_load rocm/5.0.2 +module_load cce/14.0.2 +module_load cray-libsci/22.08.1.1 +module_load cray-mpich/8.1.18 +module_load craype/2.7.17 +module_load craype-accel-amd-gfx90a +module_load buildtools/22.08 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake new file mode 100644 index 0000000..0774cf5 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake @@ -0,0 +1,42 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( ENABLE_OMP ON CACHE STRING "" ) +set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-homp" CACHE STRING "" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC ON CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hacc" ) +set( OpenACC_CXX_FLAGS "-hacc" ) +set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh new file mode 100644 index 0000000..9a66e15 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh @@ -0,0 +1,51 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/23.03 +# module_load partition/G +module_load rocm/5.2.3 +module_load cce/15.0.1 +module_load cray-libsci/22.08.1.1 +module_load cray-mpich/8.1.18 +module_load craype/2.7.20 +module_load craype-accel-amd-gfx90a +module_load buildtools/23.03 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 +module_load Boost/1.81.0-cpeCray-23.03 +module_load partition/G + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake new file mode 100644 index 0000000..e7e382b --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake @@ -0,0 +1,58 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( ENABLE_OMP ON CACHE STRING "" ) +set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-fopenmp -hnoacc -hlist=aimd" CACHE STRING "" ) + +set( OpenMP_C_LIB_NAMES "craymp" ) +set( OpenMP_CXX_LIB_NAMES "craymp" ) +set( OpenMP_Fortran_LIB_NAMES "craymp" ) +set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/15.0.1/cce/x86_64/lib/libcraymp.so" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC ON CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hacc" ) +set( OpenACC_CXX_FLAGS "-hacc" ) +set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math") +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES gfx90a) +endif() + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -G2 -haggress -DNDEBUG") +# set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") diff --git a/arch/eurohpc/lumi/cray-host/14.0.2/env.sh b/arch/eurohpc/lumi/cray-host/14.0.2/env.sh new file mode 100644 index 0000000..afe2ec1 --- /dev/null +++ b/arch/eurohpc/lumi/cray-host/14.0.2/env.sh @@ -0,0 +1,42 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/22.08 +# module_load craype-x86-milan +module_load craype-accel-host +module_load buildtools/22.08 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 + +module list + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake new file mode 100644 index 0000000..638b81f --- /dev/null +++ b/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake @@ -0,0 +1,35 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC OFF CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hnoacc" ) +set( OpenACC_CXX_FLAGS "-hnoacc" ) +set( OpenACC_Fortran_FLAGS "-hnoacc" ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-emf -N 1023 -O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") diff --git a/arch/eurohpc/meluxina/nvhpc/21.11/env.sh b/arch/eurohpc/meluxina/nvhpc/21.11/env.sh new file mode 100644 index 0000000..d0253d8 --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/21.11/env.sh @@ -0,0 +1,58 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module_unload ParaStationMPI +module_unload NVHPC +module_unload gompi +module_unload HDF5 +module_unload CMake + +# Load modules +module use /apps/USE/easybuild/staging/2022.1/modules/all + +module_load NVHPC/21.11 +module_load ParaStationMPI/5.4.11-1-GCC-10.3.0-CUDA-11.3.1 +module_load CMake/3.23.1 +module_load Boost/1.79.0-GCC-11.3.0 +module_load Python/3.10.4-GCCcore-11.3.0 + +export CC=nvc +export CXX=nvc++ +export F77=nvfortran +export FC=nvfortran +export F90=nvfortran + +export HDF5_ROOT=/mnt/tier2/project/p200061/nvhpc-install + +# Loki install workaround for new editable installs +export SETUPTOOLS_ENABLE_FEATURES="legacy-editable" + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake new file mode 100644 index 0000000..ce8de9d --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake @@ -0,0 +1,57 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module +# unless its stored as a cache variable +set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) + +# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to +# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone +# doesn't have any effect here as the module uses FORCE to overwrite the +# existing value) +set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/meluxina/nvhpc/22.3/env.sh b/arch/eurohpc/meluxina/nvhpc/22.3/env.sh new file mode 100644 index 0000000..7a23ace --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/22.3/env.sh @@ -0,0 +1,56 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module_unload ParaStationMPI +module_unload NVHPC +module_unload gompi +module_unload HDF5 +module_unload CMake + +# Load modules +module use /apps/USE/easybuild/staging/2021.5/modules/all + +module_load NVHPC/22.3 +module_load ParaStationMPI/5.4.11-1-GCC-10.3.0-CUDA-11.3.1 +module_load CMake/3.20.4 +module_load CUDA/11.3.1 +module_load Boost/1.76.0-GCC-10.3.0 +module_load Python/3.9.5-GCCcore-10.3.0 + +export CC=nvc +export CXX=nvc++ +export F77=nvfortran +export FC=nvfortran +export F90=nvfortran + +export HDF5_ROOT=/mnt/tier2/project/p200061/nvhpc-install + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake new file mode 100644 index 0000000..ce8de9d --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake @@ -0,0 +1,57 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module +# unless its stored as a cache variable +set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) + +# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to +# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone +# doesn't have any effect here as the module uses FORCE to overwrite the +# existing value) +set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/meluxina/nvhpc/22.7/env.sh b/arch/eurohpc/meluxina/nvhpc/22.7/env.sh new file mode 100644 index 0000000..326a341 --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/22.7/env.sh @@ -0,0 +1,52 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module --force purge + +# Load modules +module_load env/release/2022.1 +module_load CUDA/11.7.0 +module_load NVHPC/22.7-CUDA-11.7.0 +module_load OpenMPI/4.1.4-GCC-11.3.0 +module_load CMake +module_load Boost +module_load Python +#module_load HDF5 + +export CC=nvc +export CXX=nvc++ +export F77=nvfortran +export FC=nvfortran +export F90=nvfortran + +export HDF5_ROOT=/project/home/p200177/nasu/hdf5/1.14.1-2/build/release/2022.1/nvhpc/22.7/ + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake new file mode 100644 index 0000000..ce8de9d --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake @@ -0,0 +1,57 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module +# unless its stored as a cache variable +set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) + +# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to +# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone +# doesn't have any effect here as the module uses FORCE to overwrite the +# existing value) +set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) From 091eb3d07f1c5ce3ee4c5ba2b94aea85969dfa05 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Mon, 8 Jan 2024 09:46:10 +0100 Subject: [PATCH 02/15] Enable single precision. --- CMakeLists.txt | 10 ++++++++++ bundle.yml | 4 ++++ src/common/module/expand_mod.F90 | 30 +++++++++++++++--------------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb26466..98fbefd 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,8 +73,18 @@ if( HAVE_HDF5 ) list(APPEND CLOUDSC_DEFINITIONS HAVE_HDF5 ) endif() + +### Loki ecbuild_find_package( NAME loki ) +# Add option for single-precision builds +ecbuild_add_option( FEATURE SINGLE_PRECISION + DESCRIPTION "Build CLOUDSC in single precision" DEFAULT OFF +) +if( HAVE_SINGLE_PRECISION ) + list(APPEND CLOUDSC_DEFINITIONS SINGLE) +endif() + # build executables add_subdirectory(src) diff --git a/bundle.yml b/bundle.yml index ab14ad1..bfaf515 100755 --- a/bundle.yml +++ b/bundle.yml @@ -35,6 +35,10 @@ options : help : Specify compiler options via supplied toolchain file cmake : CMAKE_TOOLCHAIN_FILE={{value}} + - single-precision : + help : Enable single precision build of the dwarf + cmake : ENABLE_SINGLE_PRECISION=ON + - with-loki : help : Enable Loki/CLAW source-to-source transformations cmake : > diff --git a/src/common/module/expand_mod.F90 b/src/common/module/expand_mod.F90 index 236dbe8..c647bf8 100644 --- a/src/common/module/expand_mod.F90 +++ b/src/common/module/expand_mod.F90 @@ -8,7 +8,7 @@ ! nor does it submit to any jurisdiction. ! module expand_mod - USE PARKIND1 , ONLY : JPIM, JPRB, JPRD + USE PARKIND1 , ONLY : JPIM, JPRB USE YOMPHYDER, ONLY : STATE_TYPE use cloudsc_mpi_mod, only : irank, numproc @@ -83,10 +83,10 @@ end subroutine load_and_expand_l1 subroutine load_and_expand_r1(name, field, nlon, nproma, ngptot, nblocks, ngptotg) ! Load into the local memory buffer and expand to global field character(len=*) :: name - real(kind=JPRB), allocatable, intent(inout) :: field(:,:) + real(kind=jprb), allocatable, intent(inout) :: field(:,:) integer(kind=jpim), intent(in) :: nlon, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:), rbuf(:) + real(kind=jprb), allocatable :: buffer(:), rbuf(:) integer(kind=jpim) :: start, end, size call get_offsets(start, end, size, nlon, 1, 1, ngptot, ngptotg) @@ -100,10 +100,10 @@ end subroutine load_and_expand_r1 subroutine load_and_expand_r2(name, field, nlon, nlev, nproma, ngptot, nblocks, ngptotg) ! Load into the local memory buffer and expand to global field character(len=*) :: name - real(kind=JPRB), allocatable, intent(inout) :: field(:,:,:) + real(kind=jprb), allocatable, intent(inout) :: field(:,:,:) integer(kind=jpim), intent(in) :: nlon, nlev, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:,:), rbuf(:,:) + real(kind=jprb), allocatable :: buffer(:,:), rbuf(:,:) integer(kind=jpim) :: start, end, size call get_offsets(start, end, size, nlon, 1, nlev, ngptot, ngptotg) @@ -117,10 +117,10 @@ end subroutine load_and_expand_r2 subroutine load_and_expand_r3(name, field, nlon, nlev, ndim, nproma, ngptot, nblocks, ngptotg) ! Load into the local memory buffer and expand to global field character(len=*) :: name - real(kind=JPRB), allocatable, intent(inout) :: field(:,:,:,:) + real(kind=jprb), allocatable, intent(inout) :: field(:,:,:,:) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:,:,:), rbuf(:,:,:) + real(kind=jprb), allocatable :: buffer(:,:,:), rbuf(:,:,:) integer(kind=jpim) :: start, end, size call get_offsets(start, end, size, nlon, ndim, nlev, ngptot, ngptotg) @@ -135,10 +135,10 @@ subroutine load_and_expand_state(name, state, field, nlon, nlev, ndim, nproma, n ! Load into the local memory buffer and expand to global field character(len=*) :: name type(state_type), allocatable, intent(inout) :: state(:) - real(kind=JPRB), allocatable, target, intent(inout) :: field(:,:,:,:) + real(kind=jprb), allocatable, target, intent(inout) :: field(:,:,:,:) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:,:,:), rbuf(:,:,:) + real(kind=jprb), allocatable :: buffer(:,:,:), rbuf(:,:,:) integer(kind=jpim) :: start, end, size integer :: b @@ -235,8 +235,8 @@ subroutine expand_i1(buffer, field, nlon, nproma, ngptot, nblocks) end subroutine expand_i1 subroutine expand_r1(buffer, field, nlon, nproma, ngptot, nblocks) - real(kind=JPRD), intent(inout) :: buffer(nlon) - real(kind=JPRB), intent(inout) :: field(nproma, nblocks) + real(kind=jprb), intent(inout) :: buffer(nlon) + real(kind=jprb), intent(inout) :: field(nproma, nblocks) integer(kind=jpim), intent(in) :: nlon, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend @@ -269,8 +269,8 @@ end subroutine expand_r1 subroutine expand_r2(buffer, field, nlon, nproma, nlev, ngptot, nblocks) use omp_lib - real(kind=JPRD), intent(inout) :: buffer(nlon, nlev) - real(kind=JPRB), intent(inout) :: field(nproma, nlev, nblocks) + real(kind=jprb), intent(inout) :: buffer(nlon, nlev) + real(kind=jprb), intent(inout) :: field(nproma, nlev, nblocks) integer(kind=jpim), intent(in) :: nlon, nlev, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend @@ -302,8 +302,8 @@ subroutine expand_r2(buffer, field, nlon, nproma, nlev, ngptot, nblocks) end subroutine expand_r2 subroutine expand_r3(buffer, field, nlon, nproma, nlev, ndim, ngptot, nblocks) - real(kind=JPRD), intent(inout) :: buffer(nlon, nlev, ndim) - real(kind=JPRB), intent(inout) :: field(nproma, nlev, ndim, nblocks) + real(kind=jprb), intent(inout) :: buffer(nlon, nlev, ndim) + real(kind=jprb), intent(inout) :: field(nproma, nlev, ndim, nblocks) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend From 7a10aa8bd3d6b6c72d4f708e85fc8d183ea94037 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Mon, 8 Jan 2024 09:46:25 +0100 Subject: [PATCH 03/15] Fix git repo. --- cloudsc-bundle | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cloudsc-bundle b/cloudsc-bundle index 771ac48..8e8ccbe 100755 --- a/cloudsc-bundle +++ b/cloudsc-bundle @@ -25,15 +25,11 @@ shift BUNDLE_DIR="$( cd $( dirname "${BASH_SOURCE[0]}" ) && pwd -P )" -if [[ -z "${BITBUCKET}" ]]; then - export BITBUCKET=ssh://git@git.ecmwf.int -fi - # Download ecbundle scripts if not already available command_exists () { type "$1" &> /dev/null ; } if ! command_exists ${BOOTSTRAPPED} ; then if [[ ! -d ${BUNDLE_DIR}/ecbundle ]]; then - git clone ${BITBUCKET}/escape/ecbundle.git ${BUNDLE_DIR}/ecbundle + git clone https://github.com/ecmwf/ecbundle.git ${BUNDLE_DIR}/ecbundle ( cd ${BUNDLE_DIR}/ecbundle && git checkout ${ecbundle_VERSION} ) fi export PATH=${BUNDLE_DIR}/ecbundle/bin:${PATH} From d33cd54881667d3215660a503fc9d960b517898d Mon Sep 17 00:00:00 2001 From: stubbiali Date: Mon, 8 Jan 2024 09:46:42 +0100 Subject: [PATCH 04/15] Disable validation for benchmarking. --- src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 | 104 +++++++++--------- src/cloudsc2_nl/dwarf_cloudsc.F90 | 2 +- src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 | 122 +++++++++++----------- 3 files changed, 114 insertions(+), 114 deletions(-) diff --git a/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 b/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 index 38d3bdd..10d9f39 100644 --- a/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 +++ b/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 @@ -194,19 +194,19 @@ SUBROUTINE CLOUDSC_DRIVER_AD( & & ZFHPSL , ZFHPSN , ZCOVPTOT, & & YDCST, YDTHF, YHNC, YPHLI, YCLD, YCLDP, YNCL ) ! o - ! First norm - DO JROF=1,ICEND - ZNORM1(JROF)=SUM(ZTENO_T(JROF,1:NLEV)*ZTENO_T(JROF,1:NLEV)) & - & + SUM(ZTENO_Q(JROF,1:NLEV)*ZTENO_Q(JROF,1:NLEV)) & - & + SUM(ZTENO_L(JROF,1:NLEV)*ZTENO_L(JROF,1:NLEV)) & - & + SUM(ZTENO_I(JROF,1:NLEV)*ZTENO_I(JROF,1:NLEV)) & - & + SUM(ZCLC(JROF,1:NLEV)*ZCLC(JROF,1:NLEV)) & - & + SUM(ZFPLSL(JROF,1:NLEV+1)*ZFPLSL(JROF,1:NLEV+1)) & - & + SUM(ZFPLSN(JROF,1:NLEV+1)*ZFPLSN(JROF,1:NLEV+1)) & - & + SUM(ZFHPSL(JROF,1:NLEV+1)*ZFHPSL(JROF,1:NLEV+1)) & - & + SUM(ZFHPSN(JROF,1:NLEV+1)*ZFHPSN(JROF,1:NLEV+1)) & - & + SUM(ZCOVPTOT(JROF,1:NLEV)*ZCOVPTOT(JROF,1:NLEV)) - ENDDO +! ! First norm +! DO JROF=1,ICEND +! ZNORM1(JROF)=SUM(ZTENO_T(JROF,1:NLEV)*ZTENO_T(JROF,1:NLEV)) & +! & + SUM(ZTENO_Q(JROF,1:NLEV)*ZTENO_Q(JROF,1:NLEV)) & +! & + SUM(ZTENO_L(JROF,1:NLEV)*ZTENO_L(JROF,1:NLEV)) & +! & + SUM(ZTENO_I(JROF,1:NLEV)*ZTENO_I(JROF,1:NLEV)) & +! & + SUM(ZCLC(JROF,1:NLEV)*ZCLC(JROF,1:NLEV)) & +! & + SUM(ZFPLSL(JROF,1:NLEV+1)*ZFPLSL(JROF,1:NLEV+1)) & +! & + SUM(ZFPLSN(JROF,1:NLEV+1)*ZFPLSN(JROF,1:NLEV+1)) & +! & + SUM(ZFHPSL(JROF,1:NLEV+1)*ZFHPSL(JROF,1:NLEV+1)) & +! & + SUM(ZFHPSN(JROF,1:NLEV+1)*ZFHPSN(JROF,1:NLEV+1)) & +! & + SUM(ZCOVPTOT(JROF,1:NLEV)*ZCOVPTOT(JROF,1:NLEV)) +! ENDDO ! Initiaslization of output variables ZAPH = 0.0_JPRB @@ -251,35 +251,35 @@ SUBROUTINE CLOUDSC_DRIVER_AD( & & ZFHPSL , ZFHPSN , ZCOVPTOT, & & YDCST, YDTHF, YHNC, YPHLI, YCLD, YCLDP, YNCL) ! o - ! Second norm - DO JROF=1,ICEND - ZNORM2(JROF)=SUM(ZAPH0(JROF,1:NLEV+1)*ZAPH(JROF,1:NLEV+1)) & - & + SUM(ZAP0(JROF,1:NLEV)*ZAP(JROF,1:NLEV)) & - & + SUM(ZQ0(JROF,1:NLEV)*ZQ(JROF,1:NLEV)) & - & + SUM(ZZQSAT0(JROF,1:NLEV)*ZZQSAT(JROF,1:NLEV)) & - & + SUM(ZT0(JROF,1:NLEV)*ZT(JROF,1:NLEV)) & - & + SUM(ZL0(JROF,1:NLEV)*ZL(JROF,1:NLEV)) & - & + SUM(ZI0(JROF,1:NLEV)*ZI(JROF,1:NLEV)) & - & + SUM(ZLUDE0(JROF,1:NLEV)*ZLUDE(JROF,1:NLEV)) & - & + SUM(ZLU0(JROF,1:NLEV)*ZLU(JROF,1:NLEV)) & - & + SUM(ZMFU0(JROF,1:NLEV)*ZMFU(JROF,1:NLEV)) & - & + SUM(ZMFD0(JROF,1:NLEV)*ZMFD(JROF,1:NLEV)) & - & + SUM(ZTENI_T0(JROF,1:NLEV)*ZTENI_T(JROF,1:NLEV)) & - & + SUM(ZTENI_Q0(JROF,1:NLEV)*ZTENI_Q(JROF,1:NLEV)) & - & + SUM(ZTENI_L0(JROF,1:NLEV)*ZTENI_L(JROF,1:NLEV)) & - & + SUM(ZTENI_I0(JROF,1:NLEV)*ZTENI_I(JROF,1:NLEV)) & - & + SUM(ZSUPSAT0(JROF,1:NLEV)*ZSUPSAT(JROF,1:NLEV)) - ! Third norm - ! Note the machine precision is defined here as strictly 64bits - ! as we assume at worst 12 digits agreements in norms. - IF (ZNORM2(JROF) == 0._JPRB ) THEN - ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8) - ELSE - ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8)/ZNORM2(JROF) - ENDIF - ENDDO - - ZNORMG=MAX(ZNORMG,MAXVAL(ZNORM3(1:ICEND))) +! ! Second norm +! DO JROF=1,ICEND +! ZNORM2(JROF)=SUM(ZAPH0(JROF,1:NLEV+1)*ZAPH(JROF,1:NLEV+1)) & +! & + SUM(ZAP0(JROF,1:NLEV)*ZAP(JROF,1:NLEV)) & +! & + SUM(ZQ0(JROF,1:NLEV)*ZQ(JROF,1:NLEV)) & +! & + SUM(ZZQSAT0(JROF,1:NLEV)*ZZQSAT(JROF,1:NLEV)) & +! & + SUM(ZT0(JROF,1:NLEV)*ZT(JROF,1:NLEV)) & +! & + SUM(ZL0(JROF,1:NLEV)*ZL(JROF,1:NLEV)) & +! & + SUM(ZI0(JROF,1:NLEV)*ZI(JROF,1:NLEV)) & +! & + SUM(ZLUDE0(JROF,1:NLEV)*ZLUDE(JROF,1:NLEV)) & +! & + SUM(ZLU0(JROF,1:NLEV)*ZLU(JROF,1:NLEV)) & +! & + SUM(ZMFU0(JROF,1:NLEV)*ZMFU(JROF,1:NLEV)) & +! & + SUM(ZMFD0(JROF,1:NLEV)*ZMFD(JROF,1:NLEV)) & +! & + SUM(ZTENI_T0(JROF,1:NLEV)*ZTENI_T(JROF,1:NLEV)) & +! & + SUM(ZTENI_Q0(JROF,1:NLEV)*ZTENI_Q(JROF,1:NLEV)) & +! & + SUM(ZTENI_L0(JROF,1:NLEV)*ZTENI_L(JROF,1:NLEV)) & +! & + SUM(ZTENI_I0(JROF,1:NLEV)*ZTENI_I(JROF,1:NLEV)) & +! & + SUM(ZSUPSAT0(JROF,1:NLEV)*ZSUPSAT(JROF,1:NLEV)) +! ! Third norm +! ! Note the machine precision is defined here as strictly 64bits +! ! as we assume at worst 12 digits agreements in norms. +! IF (ZNORM2(JROF) == 0._JPRB ) THEN +! ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8) +! ELSE +! ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8)/ZNORM2(JROF) +! ENDIF +! ENDDO +! +! ZNORMG=MAX(ZNORMG,MAXVAL(ZNORM3(1:ICEND))) ! Log number of columns processed by this thread CALL TIMER%THREAD_LOG(TID, IGPC=ICEND) @@ -297,16 +297,16 @@ SUBROUTINE CLOUDSC_DRIVER_AD( & CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, ZHPM, NGPTOT) - ! Print final test results - print *, ' AD TEST ' - print *, ' The maximum error is ',ZNORMG,' times the zero of the machine. ' - print *, ' ============================= ' - IF (ZNORMG < 10000._JPRB) THEN - print *, ' = TEST OK = ' - ELSE - print *, ' = TEST FAILED = ' - ENDIF - print *, ' ============================= ' +! ! Print final test results +! print *, ' AD TEST ' +! print *, ' The maximum error is ',ZNORMG,' times the zero of the machine. ' +! print *, ' ============================= ' +! IF (ZNORMG < 10000._JPRB) THEN +! print *, ' = TEST OK = ' +! ELSE +! print *, ' = TEST FAILED = ' +! ENDIF +! print *, ' ============================= ' END SUBROUTINE CLOUDSC_DRIVER_AD diff --git a/src/cloudsc2_nl/dwarf_cloudsc.F90 b/src/cloudsc2_nl/dwarf_cloudsc.F90 index 7dcff77..89f1a41 100644 --- a/src/cloudsc2_nl/dwarf_cloudsc.F90 +++ b/src/cloudsc2_nl/dwarf_cloudsc.F90 @@ -122,7 +122,7 @@ PROGRAM DWARF_CLOUDSC & YRCST, YRTHF, YRPHNC, YREPHLI, YRECLD, YRECLDP) ! Validate the output against serialized reference data -CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG) +!CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG) IF (WRITE_REFERENCE == '1') THEN CALL GLOBAL_STATE%WRITE_REFERENCE(NPROMA) diff --git a/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 b/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 index bf9e350..4293173 100644 --- a/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 +++ b/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 @@ -248,27 +248,27 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & & PFHPSL5(:,:), PFHPSN5(:,:), PCOVPTOT5(:,:), & & YDCST, YDTHF, YHNC, YPHLI, YCLD, YCLDP) - ! Compute final test norm - ZCOUNT=0._JPRB - ZNORM= 0._JPRB - CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%T, ZTENO_T5, ZTENO_T, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%Q, ZTENO_Q5, ZTENO_Q, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQL), ZTENO_L5, ZTENO_L, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQI), ZTENO_I5, ZTENO_I, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, PA(:,:,IBL), PA5, ZCLC, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, PFPLSL(:,:,IBL), PFPLSL5, ZFPLSL, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, PFPLSN(:,:,IBL), PFPLSN5, ZFPLSN, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, PFHPSL(:,:,IBL), PFHPSL5, ZFHPSL, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, PFHPSN(:,:,IBL), PFHPSN5, ZFHPSN, ZNORM, ZCOUNT, ZLAMBDA) - CALL ERROR_NORM(ICEND, PCOVPTOT(:,:,IBL), PCOVPTOT5, ZCOVPTOT, ZNORM, ZCOUNT, ZLAMBDA) - - ! Global norm (normalize by number of active statistics) - IF (ZNORM == 0._JPRB .OR. ZCOUNT == 0._JPRB) THEN - print *, ' TL is totally wrong !!! ',ZNORM,ZCOUNT - stop - ELSE - ZNORMG(ILAM)=MAX(ZNORMG(ILAM),ZNORM/ZCOUNT) - ENDIF +! ! Compute final test norm +! ZCOUNT=0._JPRB +! ZNORM= 0._JPRB +! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%T, ZTENO_T5, ZTENO_T, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%Q, ZTENO_Q5, ZTENO_Q, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQL), ZTENO_L5, ZTENO_L, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQI), ZTENO_I5, ZTENO_I, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, PA(:,:,IBL), PA5, ZCLC, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, PFPLSL(:,:,IBL), PFPLSL5, ZFPLSL, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, PFPLSN(:,:,IBL), PFPLSN5, ZFPLSN, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, PFHPSL(:,:,IBL), PFHPSL5, ZFHPSL, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, PFHPSN(:,:,IBL), PFHPSN5, ZFHPSN, ZNORM, ZCOUNT, ZLAMBDA) +! CALL ERROR_NORM(ICEND, PCOVPTOT(:,:,IBL), PCOVPTOT5, ZCOVPTOT, ZNORM, ZCOUNT, ZLAMBDA) +! +! ! Global norm (normalize by number of active statistics) +! IF (ZNORM == 0._JPRB .OR. ZCOUNT == 0._JPRB) THEN +! print *, ' TL is totally wrong !!! ',ZNORM,ZCOUNT +! stop +! ELSE +! ZNORMG(ILAM)=MAX(ZNORMG(ILAM),ZNORM/ZCOUNT) +! ENDIF ENDDO ! end of lambda loops @@ -288,46 +288,46 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, ZHPM, NGPTOT) - ! Evaluate the test and print the otput - print *, ' TL Taylor test ' - print *, ' Lambda Result' - istart=0 - DO ILAM=1,10 - print *, ILAM, ZNORMG(ILAM) - ! Redefine ZNORMG - ZNORMG(ILAM)=ABS(1._JPRB - ZNORMG(ILAM)) - ! filter out first members with strong NL departures - if (istart == 0 .AND. ZNORMG(ILAM) < 0.5_JPRB ) istart=ILAM - ENDDO - - print *, ' ============================================== ' - IF (ISTART == 0 .OR. ISTART > 4 ) THEN - print *, ' TEST FAILLED, err 13 ' - ELSE - ! V-shape test - ITEST=-10 - INEGAT=1 - DO ILAM=ISTART,10-1 - IF (ZNORMG(ILAM+1)/ZNORMG(ILAM) < 1._JPRB ) THEN - ITEMPNEGAT = 1 - ELSE - ITEMPNEGAT = 0 - ENDIF - IF (INEGAT > ITEMPNEGAT) ITEST=ITEST+10 - INEGAT=ITEMPNEGAT - ENDDO - IF (ITEST == -10) ITEST = 11 ! no change of sign at all - ! Accuracy test - IF (MINVAL(ZNORMG(ISTART:10)) > 0.00001_JPRB) ITEST=ITEST+7 ! Hard limit - IF (MINVAL(ZNORMG(ISTART:10)) > 0.000001_JPRB) ITEST=ITEST+5 ! Soft limit - ! Final prints - IF (ITEST > 5) THEN - print *, ' TEST FAILLED, err ',ITEST - ELSE - print *, ' TEST PASSED, penalty ',ITEST - ENDIF - ENDIF - print *, ' ============================================== ' +! ! Evaluate the test and print the otput +! print *, ' TL Taylor test ' +! print *, ' Lambda Result' +! istart=0 +! DO ILAM=1,10 +! print *, ILAM, ZNORMG(ILAM) +! ! Redefine ZNORMG +! ZNORMG(ILAM)=ABS(1._JPRB - ZNORMG(ILAM)) +! ! filter out first members with strong NL departures +! if (istart == 0 .AND. ZNORMG(ILAM) < 0.5_JPRB ) istart=ILAM +! ENDDO +! +! print *, ' ============================================== ' +! IF (ISTART == 0 .OR. ISTART > 4 ) THEN +! print *, ' TEST FAILLED, err 13 ' +! ELSE +! ! V-shape test +! ITEST=-10 +! INEGAT=1 +! DO ILAM=ISTART,10-1 +! IF (ZNORMG(ILAM+1)/ZNORMG(ILAM) < 1._JPRB ) THEN +! ITEMPNEGAT = 1 +! ELSE +! ITEMPNEGAT = 0 +! ENDIF +! IF (INEGAT > ITEMPNEGAT) ITEST=ITEST+10 +! INEGAT=ITEMPNEGAT +! ENDDO +! IF (ITEST == -10) ITEST = 11 ! no change of sign at all +! ! Accuracy test +! IF (MINVAL(ZNORMG(ISTART:10)) > 0.00001_JPRB) ITEST=ITEST+7 ! Hard limit +! IF (MINVAL(ZNORMG(ISTART:10)) > 0.000001_JPRB) ITEST=ITEST+5 ! Soft limit +! ! Final prints +! IF (ITEST > 5) THEN +! print *, ' TEST FAILLED, err ',ITEST +! ELSE +! print *, ' TEST PASSED, penalty ',ITEST +! ENDIF +! ENDIF +! print *, ' ============================================== ' END SUBROUTINE CLOUDSC_DRIVER_TL From b21807f5bc480f797629019074df5a4c43b1b491 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Mon, 8 Jan 2024 09:47:05 +0100 Subject: [PATCH 05/15] Upgrade to ecbuild 3.8.0. --- bundle.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bundle.yml b/bundle.yml index bfaf515..e21e513 100755 --- a/bundle.yml +++ b/bundle.yml @@ -5,12 +5,13 @@ name : cloudsc-bundle version : 1.0.0-develop cmake : > CMAKE_LINK_DEPENDS_NO_SHARED=ON + ENABLE_OMP=ON projects : - ecbuild : git : https://github.com/ecmwf/ecbuild - version : 3.7.0 + version : 3.8.0 bundle : false - loki : From 39ed4f2d9a83b661b990d8ff848bf2a7bd0b8747 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Mon, 8 Jan 2024 09:47:24 +0100 Subject: [PATCH 06/15] Add build scripts for Daint, MLux and LUMI. --- build_daint.sh | 27 +++++++++++++++++++++++++++ build_lumi.sh | 27 +++++++++++++++++++++++++++ build_mlux.sh | 27 +++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 build_daint.sh create mode 100644 build_lumi.sh create mode 100644 build_mlux.sh diff --git a/build_daint.sh b/build_daint.sh new file mode 100644 index 0000000..24d9a0b --- /dev/null +++ b/build_daint.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +./cloudsc-bundle build \ + --arch=arch/cscs/daint/intel/6.0.10 \ + --build-dir=build/intel/6.0.10/release/double \ + --build-type=release \ + --clean + +./cloudsc-bundle build \ + --arch=arch/cscs/daint/intel/6.0.10 \ + --build-dir=build/intel/6.0.10/release/single \ + --build-type=release \ + --single-precision \ + --clean + +./cloudsc-bundle build \ + --arch=arch/cscs/daint/intel/6.0.10 \ + --build-dir=build/intel/6.0.10/bit/double \ + --build-type=bit \ + --clean + +./cloudsc-bundle build \ + --arch=arch/cscs/daint/intel/6.0.10 \ + --build-dir=build/intel/6.0.10/bit/single \ + --build-type=bit \ + --single-precision \ + --clean diff --git a/build_lumi.sh b/build_lumi.sh new file mode 100644 index 0000000..8593eb2 --- /dev/null +++ b/build_lumi.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +./cloudsc-bundle build \ + --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ + --build-dir=build/cray-gpu/14.0.2/release/double \ + --build-type=release \ + --clean + +./cloudsc-bundle build \ + --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ + --build-dir=build/cray-gpu/14.0.2/release/single \ + --build-type=release \ + --single-precision \ + --clean + +./cloudsc-bundle build \ + --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ + --build-dir=build/cray-gpu/14.0.2/bit/double \ + --build-type=bit \ + --clean + +./cloudsc-bundle build \ + --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ + --build-dir=build/cray-gpu/14.0.2/bit/single \ + --build-type=bit \ + --single-precision \ + --clean diff --git a/build_mlux.sh b/build_mlux.sh new file mode 100644 index 0000000..e861680 --- /dev/null +++ b/build_mlux.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +srun --cpus-per-task=128 ./cloudsc-bundle build \ + --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ + --build-dir=build/nvhpc/22.7/release/double \ + --build-type=release +# --clean + +srun --cpus-per-task=128 ./cloudsc-bundle build \ + --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ + --build-dir=build/nvhpc/22.7/release/single \ + --build-type=release \ + --single-precision +# --clean + +srun --cpus-per-task=128 ./cloudsc-bundle build \ + --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ + --build-dir=build/nvhpc/22.7/bit/double \ + --build-type=bit +# --clean + +srun --cpus-per-task=128 ./cloudsc-bundle build \ + --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ + --build-dir=build/nvhpc/22.7/bit/single \ + --build-type=bit \ + --single-precision +# --clean From c8b30ac7c92583bc46e85cb4ad36bf2365d73810 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 10 Jan 2024 16:49:08 +0100 Subject: [PATCH 07/15] Remove -noacc from OpenMP flags. --- arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake index e7e382b..d9c08be 100644 --- a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake @@ -20,7 +20,7 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) set( ENABLE_OMP ON CACHE STRING "" ) set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) -set( OpenMP_Fortran_FLAGS "-fopenmp -hnoacc -hlist=aimd" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-fopenmp -hlist=aimd" CACHE STRING "" ) set( OpenMP_C_LIB_NAMES "craymp" ) set( OpenMP_CXX_LIB_NAMES "craymp" ) From 11c938ac8e29aad55de1ee4da269efa1e61871f1 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 10 Jan 2024 18:22:31 +0100 Subject: [PATCH 08/15] Remove build scripts from repo. --- build_daint.sh | 27 --------------------------- build_lumi.sh | 27 --------------------------- build_mlux.sh | 27 --------------------------- 3 files changed, 81 deletions(-) delete mode 100644 build_daint.sh delete mode 100644 build_lumi.sh delete mode 100644 build_mlux.sh diff --git a/build_daint.sh b/build_daint.sh deleted file mode 100644 index 24d9a0b..0000000 --- a/build_daint.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -./cloudsc-bundle build \ - --arch=arch/cscs/daint/intel/6.0.10 \ - --build-dir=build/intel/6.0.10/release/double \ - --build-type=release \ - --clean - -./cloudsc-bundle build \ - --arch=arch/cscs/daint/intel/6.0.10 \ - --build-dir=build/intel/6.0.10/release/single \ - --build-type=release \ - --single-precision \ - --clean - -./cloudsc-bundle build \ - --arch=arch/cscs/daint/intel/6.0.10 \ - --build-dir=build/intel/6.0.10/bit/double \ - --build-type=bit \ - --clean - -./cloudsc-bundle build \ - --arch=arch/cscs/daint/intel/6.0.10 \ - --build-dir=build/intel/6.0.10/bit/single \ - --build-type=bit \ - --single-precision \ - --clean diff --git a/build_lumi.sh b/build_lumi.sh deleted file mode 100644 index 8593eb2..0000000 --- a/build_lumi.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -./cloudsc-bundle build \ - --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ - --build-dir=build/cray-gpu/14.0.2/release/double \ - --build-type=release \ - --clean - -./cloudsc-bundle build \ - --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ - --build-dir=build/cray-gpu/14.0.2/release/single \ - --build-type=release \ - --single-precision \ - --clean - -./cloudsc-bundle build \ - --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ - --build-dir=build/cray-gpu/14.0.2/bit/double \ - --build-type=bit \ - --clean - -./cloudsc-bundle build \ - --arch=arch/eurohpc/lumi/cray-gpu/14.0.2 \ - --build-dir=build/cray-gpu/14.0.2/bit/single \ - --build-type=bit \ - --single-precision \ - --clean diff --git a/build_mlux.sh b/build_mlux.sh deleted file mode 100644 index e861680..0000000 --- a/build_mlux.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -srun --cpus-per-task=128 ./cloudsc-bundle build \ - --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ - --build-dir=build/nvhpc/22.7/release/double \ - --build-type=release -# --clean - -srun --cpus-per-task=128 ./cloudsc-bundle build \ - --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ - --build-dir=build/nvhpc/22.7/release/single \ - --build-type=release \ - --single-precision -# --clean - -srun --cpus-per-task=128 ./cloudsc-bundle build \ - --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ - --build-dir=build/nvhpc/22.7/bit/double \ - --build-type=bit -# --clean - -srun --cpus-per-task=128 ./cloudsc-bundle build \ - --arch=arch/eurohpc/meluxina/nvhpc/22.7 \ - --build-dir=build/nvhpc/22.7/bit/single \ - --build-type=bit \ - --single-precision -# --clean From 3cb556d97ebeeb7b34e9287cbb8dfa4a93eb38aa Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 10 Jan 2024 18:23:33 +0100 Subject: [PATCH 09/15] Update Loki config files. --- src/cloudsc2_ad_loki/cloudsc_loki.config | 67 ++++++++++++------------ src/cloudsc2_nl_loki/cloudsc_loki.config | 60 ++++++++++----------- src/cloudsc2_tl_loki/cloudsc_loki.config | 67 +++++++++++------------- 3 files changed, 95 insertions(+), 99 deletions(-) diff --git a/src/cloudsc2_ad_loki/cloudsc_loki.config b/src/cloudsc2_ad_loki/cloudsc_loki.config index f525adc..0b824a2 100644 --- a/src/cloudsc2_ad_loki/cloudsc_loki.config +++ b/src/cloudsc2_ad_loki/cloudsc_loki.config @@ -14,41 +14,40 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_end', 'performance_timer%thread_log', 'performance_timer%thread_log', 'performance_timer%print_performance'] + # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver_ad' -expand = true -role = 'driver' +[routines] -[[routine]] -name = 'cloudsc2ad' -role = 'kernel' -expand = true +[routines.cloudsc_driver_ad] + role = 'driver' + expand = true -[[routine]] -name = 'cloudsc2tl' -role = 'kernel' -expand = true +[routines.cloudsc2ad] + role = 'kernel' + expand = true -[[routine]] -name = 'satur' -role = 'kernel' -expand = true -############################################## - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines.cloudsc2tl] + role = 'kernel' + expand = true + +[routines.satur] + role = 'kernel' + expand = true + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' diff --git a/src/cloudsc2_nl_loki/cloudsc_loki.config b/src/cloudsc2_nl_loki/cloudsc_loki.config index f85ab06..1d012f2 100644 --- a/src/cloudsc2_nl_loki/cloudsc_loki.config +++ b/src/cloudsc2_nl_loki/cloudsc_loki.config @@ -14,36 +14,36 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_end', 'performance_timer%thread_log', 'performance_timer%thread_log', 'performance_timer%print_performance'] + # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver' -expand = true -role = 'driver' +[routines] -[[routine]] -name = 'cloudsc2' -role = 'kernel' -expand = true +[routines.cloudsc_driver] + role = 'driver' + expand = true -[[routine]] -name = 'satur' -role = 'kernel' -expand = true -############################################## - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines.cloudsc2] + role = 'kernel' + expand = true + +[routines.satur] + expand = true + role = 'kernel' + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' diff --git a/src/cloudsc2_tl_loki/cloudsc_loki.config b/src/cloudsc2_tl_loki/cloudsc_loki.config index 675d59c..ea14231 100644 --- a/src/cloudsc2_tl_loki/cloudsc_loki.config +++ b/src/cloudsc2_tl_loki/cloudsc_loki.config @@ -15,43 +15,40 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_log', 'performance_timer%print_performance', 'validate_taylor_test'] + # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver_tl' -expand = true -role = 'driver' +[routines] -[[routine]] -name = 'cloudsc2' -role = 'kernel' -expand = true +[routines.cloudsc_driver_tl] + role = 'driver' + expand = true -[[routine]] -name = 'cloudsc2tl' -role = 'kernel' -expand = true +[routines.cloudsc2] + role = 'kernel' + expand = true +[routines.cloudsc2tl] + role = 'kernel' + expand = true -[[routine]] -name = 'satur' -role = 'kernel' -expand = true - -############################################## - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines.satur] + role = 'kernel' + expand = true + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' From 8f7074587979a5a0cda454d6232e0c10b41ab974 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 10 Jan 2024 18:23:54 +0100 Subject: [PATCH 10/15] Add directive openacc. --- src/cloudsc2_ad_loki/CMakeLists.txt | 2 ++ src/cloudsc2_tl_loki/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/cloudsc2_ad_loki/CMakeLists.txt b/src/cloudsc2_ad_loki/CMakeLists.txt index d7a4dde..0019db8 100755 --- a/src/cloudsc2_ad_loki/CMakeLists.txt +++ b/src/cloudsc2_ad_loki/CMakeLists.txt @@ -66,6 +66,7 @@ if( HAVE_CLOUDSC2_AD_LOKI ) loki_transform_convert( MODE scc FRONTEND ${LOKI_FRONTEND} CPP + DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config PATH ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 @@ -110,6 +111,7 @@ if( HAVE_CLOUDSC2_AD_LOKI ) loki_transform_convert( MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP + DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config PATH ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 diff --git a/src/cloudsc2_tl_loki/CMakeLists.txt b/src/cloudsc2_tl_loki/CMakeLists.txt index 48abcdd..8a774a3 100755 --- a/src/cloudsc2_tl_loki/CMakeLists.txt +++ b/src/cloudsc2_tl_loki/CMakeLists.txt @@ -68,6 +68,7 @@ if( HAVE_CLOUDSC2_TL_LOKI ) loki_transform_convert( MODE scc FRONTEND ${LOKI_FRONTEND} CPP + DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config PATH ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 @@ -119,6 +120,7 @@ if( HAVE_CLOUDSC2_TL_LOKI ) loki_transform_convert( MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP + DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config PATH ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 From 885c2d74a8da1913331d11b47e69c142eaaefde3 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Wed, 10 Jan 2024 18:25:00 +0100 Subject: [PATCH 11/15] Fix VALIDATE_TAYLOR_TEST. --- src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 | 2 +- src/cloudsc2_tl_loki/error.F90 | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 b/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 index 2d6be3a..263bfca 100644 --- a/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 +++ b/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 @@ -259,7 +259,7 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, ZHPM, NGPTOT) - CALL VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & + CALL VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, NGPBLKS, & & BUFFER_LOC(:,:,1,:) , ZTENO_T5(:,:,:,:), ZTENO_T(:,:,:), & & BUFFER_LOC(:,:,3,:) , ZTENO_Q5(:,:,:,:), ZTENO_Q(:,:,:), & & BUFFER_LOC(:,:,3+NCLDQL,:), ZTENO_L5(:,:,:,:), ZTENO_L(:,:,:), & diff --git a/src/cloudsc2_tl_loki/error.F90 b/src/cloudsc2_tl_loki/error.F90 index 8557338..b3a83e6 100644 --- a/src/cloudsc2_tl_loki/error.F90 +++ b/src/cloudsc2_tl_loki/error.F90 @@ -17,7 +17,7 @@ SUBROUTINE ERROR_NORM(NLON, FIELD, PERT5, PERT, ZNORM, ZCOUNT, ZLAMBDA) END SUBROUTINE ERROR_NORM - SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & + SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, NGPBLKS, & & FIELD_T, PERT5_T, PERT_T, & & FIELD_Q, PERT5_Q, PERT_Q, & & FIELD_L, PERT5_L, PERT_L, & @@ -29,7 +29,7 @@ SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & & PFHPSN, PERT5_FHPSN, PERT_FHPSN, & & PCOVPTOT, PERT5_COVPTOT, PERT_COVPTOT & & ) - INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NLEV, NLAM, NGPTOT + INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NLEV, NLAM, NGPTOT, NGPBLKS REAL(KIND=JPRB), INTENT(IN) :: FIELD_T(NPROMA,NLEV,NGPBLKS), PERT5_T(NPROMA,NLEV,NLAM,NGPBLKS), PERT_T(NPROMA,NLEV,NGPBLKS) REAL(KIND=JPRB), INTENT(IN) :: FIELD_Q(NPROMA,NLEV,NGPBLKS), PERT5_Q(NPROMA,NLEV,NLAM,NGPBLKS), PERT_Q(NPROMA,NLEV,NGPBLKS) REAL(KIND=JPRB), INTENT(IN) :: FIELD_L(NPROMA,NLEV,NGPBLKS), PERT5_L(NPROMA,NLEV,NLAM,NGPBLKS), PERT_L(NPROMA,NLEV,NGPBLKS) @@ -41,11 +41,10 @@ SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & REAL(KIND=JPRB), INTENT(IN) :: PFHPSN(NPROMA,NLEV+1,NGPBLKS), PERT5_FHPSN(NPROMA,NLEV+1,NLAM,NGPBLKS), PERT_FHPSN(NPROMA,NLEV+1,NGPBLKS) REAL(KIND=JPRB), INTENT(IN) :: PCOVPTOT(NPROMA,NLEV,NGPBLKS), PERT5_COVPTOT(NPROMA,NLEV,NLAM,NGPBLKS), PERT_COVPTOT(NPROMA,NLEV,NGPBLKS) - INTEGER(KIND=JPIM) :: JKGLO, IBL, ICEND, ILAM, NGPBLKS + INTEGER(KIND=JPIM) :: JKGLO, IBL, ICEND, ILAM INTEGER(KIND=JPIM) :: ISTART,ITEST,INEGAT,ITEMPNEGAT REAL(KIND=JPRB) :: ZLAMBDA, ZCOUNT, ZNORM, ZNORMG(10) - NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) DO JKGLO=1,NGPTOT,NPROMA IBL = (JKGLO-1)/NPROMA+1 ICEND = MIN(NPROMA,NGPTOT-JKGLO+1) From 25064beadd1bb66f059c91c3cb27b630056b46a1 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Thu, 11 Jan 2024 00:42:31 +0100 Subject: [PATCH 12/15] Use loki_transform() in CMake files. --- src/cloudsc2_ad_loki/CMakeLists.txt | 125 ++++++++------- src/cloudsc2_nl_loki/CMakeLists.txt | 47 +++--- src/cloudsc2_tl_loki/CMakeLists.txt | 228 ++++++++++++++-------------- 3 files changed, 216 insertions(+), 184 deletions(-) diff --git a/src/cloudsc2_ad_loki/CMakeLists.txt b/src/cloudsc2_ad_loki/CMakeLists.txt index 0019db8..6dc2ee4 100755 --- a/src/cloudsc2_ad_loki/CMakeLists.txt +++ b/src/cloudsc2_ad_loki/CMakeLists.txt @@ -21,20 +21,21 @@ if( HAVE_CLOUDSC2_AD_LOKI ) ## * Internal "do-nothing" mode for Loki debug ## #################################################### - - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} + loki_transform( + COMMAND convert + MODE idem + FRONTEND ${LOKI_FRONTEND} CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 INCLUDES ${COMMON_INCLUDE} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem OUTPUT - loki-idem/satur.idem.F90 - loki-idem/cloudsc2ad.idem.F90 - loki-idem/cloudsc2tl.idem.F90 - loki-idem/cloudsc_driver_ad_loki_mod.idem.F90 - DEPENDS satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 + loki-idem/satur.idem.F90 + loki-idem/cloudsc2ad.idem.F90 + loki-idem/cloudsc2tl.idem.F90 + loki-idem/cloudsc_driver_ad_loki_mod.idem.F90 + DEPENDS satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 ) ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-idem @@ -57,51 +58,58 @@ if( HAVE_CLOUDSC2_AD_LOKI ) OMP 1 ) - #################################################### - ## "Single Column Coalesced" (SCC) mode ## - ## * Removes horizontal vector loops ## - ## * Invokes compute kernel as `!$acc vector` ## - #################################################### + #################################################### + ## "Single Column Coalesced" (SCC) mode ## + ## * Removes horizontal vector loops ## + ## * Invokes compute kernel as `!$acc vector` ## + #################################################### - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc - OUTPUT - loki-scc/satur.scc.F90 - loki-scc/cloudsc2ad.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 - DEPENDS - satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 - ) - ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-scc - SOURCES - dwarf_cloudsc.F90 - loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc2ad.scc.F90 - loki-scc/satur.scc.F90 - LIBS - cloudsc2-common-lib - DEFINITIONS ${CLOUDSC_DEFINITIONS} - ) + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + OUTPUT + loki-scc/satur.scc.F90 + loki-scc/cloudsc2ad.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 + DEPENDS + satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-scc + SOURCES + dwarf_cloudsc.F90 + loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc2ad.scc.F90 + loki-scc/satur.scc.F90 + LIBS + cloudsc2-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc2-ad--loki-scc-serial + COMMAND bin/dwarf-cloudsc2-ad-loki-scc + ARGS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G + ) + - ecbuild_add_test( - TARGET dwarf-cloudsc2-ad--loki-scc-serial - COMMAND bin/dwarf-cloudsc2-ad-loki-scc - ARGS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G - ) #################################################### ## SCC-hoist mode ## ## * SCC with vector loop hoisted ## @@ -109,16 +117,17 @@ if( HAVE_CLOUDSC2_AD_LOKI ) ## * Temporary arrays hoisted to driver ## #################################################### - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc-hoist + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist OUTPUT loki-scc-hoist/satur.scc_hoist.F90 loki-scc-hoist/cloudsc2ad.scc_hoist.F90 @@ -126,7 +135,11 @@ if( HAVE_CLOUDSC2_AD_LOKI ) loki-scc-hoist/cloudsc_driver_ad_loki_mod.scc_hoist.F90 DEPENDS satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP ) + ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-scc-hoist SOURCES dwarf_cloudsc.F90 diff --git a/src/cloudsc2_nl_loki/CMakeLists.txt b/src/cloudsc2_nl_loki/CMakeLists.txt index 3e8ef97..5aff52b 100755 --- a/src/cloudsc2_nl_loki/CMakeLists.txt +++ b/src/cloudsc2_nl_loki/CMakeLists.txt @@ -15,19 +15,22 @@ if( HAVE_CLOUDSC2_NL_LOKI ) set( COMMON_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/../common/include" ) set( LOKI_FRONTEND "fp" CACHE STRING "Frontend parser for Loki transforms" ) + + #################################################### ## Idempotence mode: ## ## * Internal "do-nothing" mode for Loki debug ## #################################################### - - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} + loki_transform( + COMMAND convert + MODE idem + FRONTEND ${LOKI_FRONTEND} CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 INCLUDES ${COMMON_INCLUDE} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem OUTPUT loki-idem/satur.idem.F90 loki-idem/cloudsc2.idem.F90 loki-idem/cloudsc_driver_loki_mod.idem.F90 DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 ) @@ -58,19 +61,22 @@ if( HAVE_CLOUDSC2_NL_LOKI ) ## * Invokes compute kernel as `!$acc vector` ## #################################################### - - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc OUTPUT loki-scc/satur.scc.F90 loki-scc/cloudsc2.scc.F90 loki-scc/cloudsc_driver_loki_mod.scc.F90 DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP ) ecbuild_add_executable( TARGET dwarf-cloudsc2-nl-loki-scc @@ -93,6 +99,7 @@ if( HAVE_CLOUDSC2_NL_LOKI ) ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G ) + #################################################### ## SCC-hoist mode ## ## * SCC with vector loop hoisted ## @@ -100,22 +107,25 @@ if( HAVE_CLOUDSC2_NL_LOKI ) ## * Temporary arrays hoisted to driver ## #################################################### - - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc-hoist + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist OUTPUT loki-scc-hoist/satur.scc_hoist.F90 loki-scc-hoist/cloudsc2.scc_hoist.F90 loki-scc-hoist/cloudsc_driver_loki_mod.scc_hoist.F90 - DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 + DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP ) ecbuild_add_executable( TARGET dwarf-cloudsc2-nl-loki-scc-hoist @@ -137,4 +147,5 @@ if( HAVE_CLOUDSC2_NL_LOKI ) OMP 1 ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=19G ) + endif() diff --git a/src/cloudsc2_tl_loki/CMakeLists.txt b/src/cloudsc2_tl_loki/CMakeLists.txt index 8a774a3..89965d6 100755 --- a/src/cloudsc2_tl_loki/CMakeLists.txt +++ b/src/cloudsc2_tl_loki/CMakeLists.txt @@ -15,26 +15,28 @@ if( HAVE_CLOUDSC2_TL_LOKI ) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + #################################################### ## Idempotence mode: ## ## * Internal "do-nothing" mode for Loki debug ## #################################################### - - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE idem + FRONTEND ${LOKI_FRONTEND} CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 INCLUDES ${COMMON_INCLUDE} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem - OUTPUT - loki-idem/satur.idem.F90 - loki-idem/cloudsc2.idem.F90 - loki-idem/cloudsc2tl.idem.F90 - loki-idem/cloudsc_driver_tl_loki_mod.idem.F90 - DEPENDS - satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + OUTPUT + loki-idem/satur.idem.F90 + loki-idem/cloudsc2.idem.F90 + loki-idem/cloudsc2tl.idem.F90 + loki-idem/cloudsc_driver_tl_loki_mod.idem.F90 + DEPENDS satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 + CPP ) ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-idem @@ -59,105 +61,111 @@ if( HAVE_CLOUDSC2_TL_LOKI ) ) - #################################################### - ## "Single Column Coalesced" (SCC) mode ## - ## * Removes horizontal vector loops ## - ## * Invokes compute kernel as `!$acc vector` ## - #################################################### - - - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP + #################################################### + ## "Single Column Coalesced" (SCC) mode ## + ## * Removes horizontal vector loops ## + ## * Invokes compute kernel as `!$acc vector` ## + #################################################### + + loki_transform( + COMMAND convert + MODE scc + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc - OUTPUT - loki-scc/satur.scc.F90 - loki-scc/cloudsc2.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 - DEPENDS - satur.F90 - cloudsc2.F90 - cloudsc2tl.F90 - cloudsc_driver_tl_loki_mod.F90 - ) - - ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc - SOURCES - dwarf_cloudsc.F90 - loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc2.scc.F90 - loki-scc/satur.scc.F90 - error.F90 - LIBS - cloudsc2-common-lib - DEFINITIONS ${CLOUDSC_DEFINITIONS} - ) - - ecbuild_add_test( - TARGET dwarf-cloudsc2-tl--loki-scc-serial - COMMAND bin/dwarf-cloudsc2-tl-loki-scc - ARGS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G - ) - - #################################################### - ## SCC-hoist mode ## - ## * SCC with vector loop hoisted ## - ## * Kernel is "seq, but args are full blocks ## - ## * Temporary arrays hoisted to driver ## - #################################################### - - - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP - DIRECTIVE openacc - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist - OUTPUT - loki-scc-hoist/satur.scc_hoist.F90 - loki-scc-hoist/cloudsc2.scc_hoist.F90 - loki-scc-hoist/cloudsc2tl.scc_hoist.F90 - loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 - DEPENDS - satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 - ) - - - ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc-hoist - SOURCES - dwarf_cloudsc.F90 - loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 - loki-scc-hoist/cloudsc2tl.scc_hoist.F90 - loki-scc-hoist/cloudsc2.scc_hoist.F90 - loki-scc-hoist/satur.scc_hoist.F90 - error.F90 - LIBS - cloudsc2-common-lib - DEFINITIONS ${CLOUDSC_DEFINITIONS} - ) + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + OUTPUT + loki-scc/satur.scc.F90 + loki-scc/cloudsc2.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 + DEPENDS + satur.F90 + cloudsc2.F90 + cloudsc2tl.F90 + cloudsc_driver_tl_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc + SOURCES + dwarf_cloudsc.F90 + loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc2.scc.F90 + loki-scc/satur.scc.F90 + error.F90 + LIBS + cloudsc2-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc2-tl--loki-scc-serial + COMMAND bin/dwarf-cloudsc2-tl-loki-scc + ARGS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G + ) + + + #################################################### + ## SCC-hoist mode ## + ## * SCC with vector loop hoisted ## + ## * Kernel is "seq, but args are full blocks ## + ## * Temporary arrays hoisted to driver ## + #################################################### + + loki_transform( + COMMAND convert + MODE scc-hoist + FRONTEND ${LOKI_FRONTEND} + DIRECTIVE openacc + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + OUTPUT + loki-scc-hoist/satur.scc_hoist.F90 + loki-scc-hoist/cloudsc2.scc_hoist.F90 + loki-scc-hoist/cloudsc2tl.scc_hoist.F90 + loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 + DEPENDS + satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc-hoist + SOURCES + dwarf_cloudsc.F90 + loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 + loki-scc-hoist/cloudsc2tl.scc_hoist.F90 + loki-scc-hoist/cloudsc2.scc_hoist.F90 + loki-scc-hoist/satur.scc_hoist.F90 + error.F90 + LIBS + cloudsc2-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) - ecbuild_add_test( - TARGET dwarf-cloudsc2-tl-loki-scc-hoist-serial - COMMAND bin/dwarf-cloudsc2-tl-loki-scc-hoist - ARGS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=19G - ) + ecbuild_add_test( + TARGET dwarf-cloudsc2-tl-loki-scc-hoist-serial + COMMAND bin/dwarf-cloudsc2-tl-loki-scc-hoist + ARGS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=19G + ) endif() From ba9a9238695986901e47e29fa29293713706fb36 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Thu, 11 Jan 2024 09:50:27 +0100 Subject: [PATCH 13/15] Remove arch files which have not been tested. --- arch/eurohpc/leonardo/nvhpc/23.1/env.sh | 49 ---------------- .../leonardo/nvhpc/23.1/toolchain.cmake | 57 ------------------- arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh | 44 -------------- .../lumi/amd-gpu/8.3.3/toolchain.cmake | 31 ---------- arch/eurohpc/lumi/amd-host/8.3.3/env.sh | 43 -------------- .../lumi/amd-host/8.3.3/toolchain.cmake | 25 -------- arch/eurohpc/lumi/cray-host/14.0.2/env.sh | 42 -------------- .../lumi/cray-host/14.0.2/toolchain.cmake | 35 ------------ 8 files changed, 326 deletions(-) delete mode 100644 arch/eurohpc/leonardo/nvhpc/23.1/env.sh delete mode 100644 arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake delete mode 100644 arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh delete mode 100644 arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake delete mode 100644 arch/eurohpc/lumi/amd-host/8.3.3/env.sh delete mode 100644 arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake delete mode 100644 arch/eurohpc/lumi/cray-host/14.0.2/env.sh delete mode 100644 arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake diff --git a/arch/eurohpc/leonardo/nvhpc/23.1/env.sh b/arch/eurohpc/leonardo/nvhpc/23.1/env.sh deleted file mode 100644 index f1333db..0000000 --- a/arch/eurohpc/leonardo/nvhpc/23.1/env.sh +++ /dev/null @@ -1,49 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# NB: This does currently not support the Serialbox-based build modes -# because the available Boost module does not include the boost_filesystem library - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Load modules -module_load nvhpc/23.1 -module_load openmpi/4.1.4--nvhpc--23.1-cuda-11.8 -module_load cmake/3.24.3 -module_load cuda/11.8 -module_load hdf5/1.12.2--openmpi--4.1.4--nvhpc--23.1 -module_load python/3.10.8--gcc--8.5.0 - -export CC=nvc -export CXX=nvc++ -export F77=nvfortran -export FC=nvfortran -export F90=nvfortran - -# Increase stack size to maximum -ulimit -S -s unlimited - -set -x - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -# Variable no longer required, make sure it is not set -unset ECBUILD_TOOLCHAIN diff --git a/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake b/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake deleted file mode 100644 index ce8de9d..0000000 --- a/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake +++ /dev/null @@ -1,57 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module -# unless its stored as a cache variable -set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) - -# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to -# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone -# doesn't have any effect here as the module uses FORCE to overwrite the -# existing value) -set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) -set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -# NB: We have to add `-mp` again to avoid undefined symbols during linking -# (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) -# Enable this to get more detailed compiler output -# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "-fpic") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") - -set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) - -set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) - -set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh b/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh deleted file mode 100644 index 67eee0b..0000000 --- a/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh +++ /dev/null @@ -1,44 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module reset - -# Load modules -module_load LUMI/22.08 -module_load partition/G -module_load PrgEnv-aocc/8.3.3 -module_load craype-accel-amd-gfx90a -module_load buildtools/22.08 -module_load cray-hdf5/1.12.1.5 -module_load cray-python/3.9.12.1 - -# Specify compilers -export CC=amdclang CXX=amdclang++ FC=amdflang -#export CC=cc CXX=CC FC=ftn - -set -x - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake b/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake deleted file mode 100644 index 557774a..0000000 --- a/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake +++ /dev/null @@ -1,31 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -set( OpenMP_Fortran_FLAGS "-fopenmp --offload-arch=gfx90a" CACHE STRING "" ) - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -set( ENABLE_ACC OFF CACHE STRING "" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic -O3") diff --git a/arch/eurohpc/lumi/amd-host/8.3.3/env.sh b/arch/eurohpc/lumi/amd-host/8.3.3/env.sh deleted file mode 100644 index 6c3ba71..0000000 --- a/arch/eurohpc/lumi/amd-host/8.3.3/env.sh +++ /dev/null @@ -1,43 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module reset - -# Load modules -module_load LUMI/22.08 -module_load partition/C -module_load PrgEnv-aocc/8.3.3 -module_load craype-accel-host -module_load buildtools/22.08 -module_load cray-hdf5/1.12.1.5 -module_load cray-python/3.9.12.1 - -# Specify compilers -export CC=cc CXX=CC FC=ftn - -set -x - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake b/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake deleted file mode 100644 index dac9ed4..0000000 --- a/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake +++ /dev/null @@ -1,25 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -set( ENABLE_ACC OFF CACHE STRING "" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic -O3") diff --git a/arch/eurohpc/lumi/cray-host/14.0.2/env.sh b/arch/eurohpc/lumi/cray-host/14.0.2/env.sh deleted file mode 100644 index afe2ec1..0000000 --- a/arch/eurohpc/lumi/cray-host/14.0.2/env.sh +++ /dev/null @@ -1,42 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module reset - -# Load modules -module_load PrgEnv-cray/8.3.3 -module_load LUMI/22.08 -# module_load craype-x86-milan -module_load craype-accel-host -module_load buildtools/22.08 -module_load cray-hdf5/1.12.1.5 -module_load cray-python/3.9.12.1 - -module list - -set -x - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake deleted file mode 100644 index 638b81f..0000000 --- a/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake +++ /dev/null @@ -1,35 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) -set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) - -#################################################################### -# OpenACC FLAGS -#################################################################### - -set( ENABLE_ACC OFF CACHE STRING "" ) -set( OpenACC_C_FLAGS "-hnoacc" ) -set( OpenACC_CXX_FLAGS "-hnoacc" ) -set( OpenACC_Fortran_FLAGS "-hnoacc" ) - -#################################################################### -# Compiler FLAGS -#################################################################### - -# General Flags (add to default) -set(ECBUILD_Fortran_FLAGS "-hcontiguous") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") - -set(ECBUILD_Fortran_FLAGS_BIT "-emf -N 1023 -O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") From 1cbfac90fedadca7462e391d770e1e04254cd3a1 Mon Sep 17 00:00:00 2001 From: stubbiali Date: Thu, 11 Jan 2024 10:11:16 +0100 Subject: [PATCH 14/15] Remove arch files which have not been tested. --- arch/eurohpc/meluxina/nvhpc/21.11/env.sh | 58 ------------------- .../meluxina/nvhpc/21.11/toolchain.cmake | 57 ------------------ arch/eurohpc/meluxina/nvhpc/22.3/env.sh | 56 ------------------ .../meluxina/nvhpc/22.3/toolchain.cmake | 57 ------------------ 4 files changed, 228 deletions(-) delete mode 100644 arch/eurohpc/meluxina/nvhpc/21.11/env.sh delete mode 100644 arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake delete mode 100644 arch/eurohpc/meluxina/nvhpc/22.3/env.sh delete mode 100644 arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake diff --git a/arch/eurohpc/meluxina/nvhpc/21.11/env.sh b/arch/eurohpc/meluxina/nvhpc/21.11/env.sh deleted file mode 100644 index d0253d8..0000000 --- a/arch/eurohpc/meluxina/nvhpc/21.11/env.sh +++ /dev/null @@ -1,58 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload all modules to be certain -module_unload ParaStationMPI -module_unload NVHPC -module_unload gompi -module_unload HDF5 -module_unload CMake - -# Load modules -module use /apps/USE/easybuild/staging/2022.1/modules/all - -module_load NVHPC/21.11 -module_load ParaStationMPI/5.4.11-1-GCC-10.3.0-CUDA-11.3.1 -module_load CMake/3.23.1 -module_load Boost/1.79.0-GCC-11.3.0 -module_load Python/3.10.4-GCCcore-11.3.0 - -export CC=nvc -export CXX=nvc++ -export F77=nvfortran -export FC=nvfortran -export F90=nvfortran - -export HDF5_ROOT=/mnt/tier2/project/p200061/nvhpc-install - -# Loki install workaround for new editable installs -export SETUPTOOLS_ENABLE_FEATURES="legacy-editable" - -# Increase stack size to maximum -ulimit -S -s unlimited - -set -x - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake deleted file mode 100644 index ce8de9d..0000000 --- a/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake +++ /dev/null @@ -1,57 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module -# unless its stored as a cache variable -set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) - -# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to -# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone -# doesn't have any effect here as the module uses FORCE to overwrite the -# existing value) -set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) -set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -# NB: We have to add `-mp` again to avoid undefined symbols during linking -# (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) -# Enable this to get more detailed compiler output -# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "-fpic") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") - -set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) - -set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) - -set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/meluxina/nvhpc/22.3/env.sh b/arch/eurohpc/meluxina/nvhpc/22.3/env.sh deleted file mode 100644 index 7a23ace..0000000 --- a/arch/eurohpc/meluxina/nvhpc/22.3/env.sh +++ /dev/null @@ -1,56 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload all modules to be certain -module_unload ParaStationMPI -module_unload NVHPC -module_unload gompi -module_unload HDF5 -module_unload CMake - -# Load modules -module use /apps/USE/easybuild/staging/2021.5/modules/all - -module_load NVHPC/22.3 -module_load ParaStationMPI/5.4.11-1-GCC-10.3.0-CUDA-11.3.1 -module_load CMake/3.20.4 -module_load CUDA/11.3.1 -module_load Boost/1.76.0-GCC-10.3.0 -module_load Python/3.9.5-GCCcore-10.3.0 - -export CC=nvc -export CXX=nvc++ -export F77=nvfortran -export FC=nvfortran -export F90=nvfortran - -export HDF5_ROOT=/mnt/tier2/project/p200061/nvhpc-install - -# Increase stack size to maximum -ulimit -S -s unlimited - -set -x - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake deleted file mode 100644 index ce8de9d..0000000 --- a/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake +++ /dev/null @@ -1,57 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module -# unless its stored as a cache variable -set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) - -# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to -# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone -# doesn't have any effect here as the module uses FORCE to overwrite the -# existing value) -set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) -set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -# NB: We have to add `-mp` again to avoid undefined symbols during linking -# (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) -# Enable this to get more detailed compiler output -# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "-fpic") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") - -set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) - -set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) - -set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) From 572095b35ba2f1a0f06421bf541af434803675df Mon Sep 17 00:00:00 2001 From: stubbiali Date: Thu, 11 Jan 2024 10:16:44 +0100 Subject: [PATCH 15/15] Uncomment code. --- src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 | 104 +++++++++--------- src/cloudsc2_nl/dwarf_cloudsc.F90 | 2 +- src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 | 122 +++++++++++----------- 3 files changed, 114 insertions(+), 114 deletions(-) diff --git a/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 b/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 index 10d9f39..38d3bdd 100644 --- a/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 +++ b/src/cloudsc2_ad/cloudsc_driver_ad_mod.F90 @@ -194,19 +194,19 @@ SUBROUTINE CLOUDSC_DRIVER_AD( & & ZFHPSL , ZFHPSN , ZCOVPTOT, & & YDCST, YDTHF, YHNC, YPHLI, YCLD, YCLDP, YNCL ) ! o -! ! First norm -! DO JROF=1,ICEND -! ZNORM1(JROF)=SUM(ZTENO_T(JROF,1:NLEV)*ZTENO_T(JROF,1:NLEV)) & -! & + SUM(ZTENO_Q(JROF,1:NLEV)*ZTENO_Q(JROF,1:NLEV)) & -! & + SUM(ZTENO_L(JROF,1:NLEV)*ZTENO_L(JROF,1:NLEV)) & -! & + SUM(ZTENO_I(JROF,1:NLEV)*ZTENO_I(JROF,1:NLEV)) & -! & + SUM(ZCLC(JROF,1:NLEV)*ZCLC(JROF,1:NLEV)) & -! & + SUM(ZFPLSL(JROF,1:NLEV+1)*ZFPLSL(JROF,1:NLEV+1)) & -! & + SUM(ZFPLSN(JROF,1:NLEV+1)*ZFPLSN(JROF,1:NLEV+1)) & -! & + SUM(ZFHPSL(JROF,1:NLEV+1)*ZFHPSL(JROF,1:NLEV+1)) & -! & + SUM(ZFHPSN(JROF,1:NLEV+1)*ZFHPSN(JROF,1:NLEV+1)) & -! & + SUM(ZCOVPTOT(JROF,1:NLEV)*ZCOVPTOT(JROF,1:NLEV)) -! ENDDO + ! First norm + DO JROF=1,ICEND + ZNORM1(JROF)=SUM(ZTENO_T(JROF,1:NLEV)*ZTENO_T(JROF,1:NLEV)) & + & + SUM(ZTENO_Q(JROF,1:NLEV)*ZTENO_Q(JROF,1:NLEV)) & + & + SUM(ZTENO_L(JROF,1:NLEV)*ZTENO_L(JROF,1:NLEV)) & + & + SUM(ZTENO_I(JROF,1:NLEV)*ZTENO_I(JROF,1:NLEV)) & + & + SUM(ZCLC(JROF,1:NLEV)*ZCLC(JROF,1:NLEV)) & + & + SUM(ZFPLSL(JROF,1:NLEV+1)*ZFPLSL(JROF,1:NLEV+1)) & + & + SUM(ZFPLSN(JROF,1:NLEV+1)*ZFPLSN(JROF,1:NLEV+1)) & + & + SUM(ZFHPSL(JROF,1:NLEV+1)*ZFHPSL(JROF,1:NLEV+1)) & + & + SUM(ZFHPSN(JROF,1:NLEV+1)*ZFHPSN(JROF,1:NLEV+1)) & + & + SUM(ZCOVPTOT(JROF,1:NLEV)*ZCOVPTOT(JROF,1:NLEV)) + ENDDO ! Initiaslization of output variables ZAPH = 0.0_JPRB @@ -251,35 +251,35 @@ SUBROUTINE CLOUDSC_DRIVER_AD( & & ZFHPSL , ZFHPSN , ZCOVPTOT, & & YDCST, YDTHF, YHNC, YPHLI, YCLD, YCLDP, YNCL) ! o -! ! Second norm -! DO JROF=1,ICEND -! ZNORM2(JROF)=SUM(ZAPH0(JROF,1:NLEV+1)*ZAPH(JROF,1:NLEV+1)) & -! & + SUM(ZAP0(JROF,1:NLEV)*ZAP(JROF,1:NLEV)) & -! & + SUM(ZQ0(JROF,1:NLEV)*ZQ(JROF,1:NLEV)) & -! & + SUM(ZZQSAT0(JROF,1:NLEV)*ZZQSAT(JROF,1:NLEV)) & -! & + SUM(ZT0(JROF,1:NLEV)*ZT(JROF,1:NLEV)) & -! & + SUM(ZL0(JROF,1:NLEV)*ZL(JROF,1:NLEV)) & -! & + SUM(ZI0(JROF,1:NLEV)*ZI(JROF,1:NLEV)) & -! & + SUM(ZLUDE0(JROF,1:NLEV)*ZLUDE(JROF,1:NLEV)) & -! & + SUM(ZLU0(JROF,1:NLEV)*ZLU(JROF,1:NLEV)) & -! & + SUM(ZMFU0(JROF,1:NLEV)*ZMFU(JROF,1:NLEV)) & -! & + SUM(ZMFD0(JROF,1:NLEV)*ZMFD(JROF,1:NLEV)) & -! & + SUM(ZTENI_T0(JROF,1:NLEV)*ZTENI_T(JROF,1:NLEV)) & -! & + SUM(ZTENI_Q0(JROF,1:NLEV)*ZTENI_Q(JROF,1:NLEV)) & -! & + SUM(ZTENI_L0(JROF,1:NLEV)*ZTENI_L(JROF,1:NLEV)) & -! & + SUM(ZTENI_I0(JROF,1:NLEV)*ZTENI_I(JROF,1:NLEV)) & -! & + SUM(ZSUPSAT0(JROF,1:NLEV)*ZSUPSAT(JROF,1:NLEV)) -! ! Third norm -! ! Note the machine precision is defined here as strictly 64bits -! ! as we assume at worst 12 digits agreements in norms. -! IF (ZNORM2(JROF) == 0._JPRB ) THEN -! ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8) -! ELSE -! ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8)/ZNORM2(JROF) -! ENDIF -! ENDDO -! -! ZNORMG=MAX(ZNORMG,MAXVAL(ZNORM3(1:ICEND))) + ! Second norm + DO JROF=1,ICEND + ZNORM2(JROF)=SUM(ZAPH0(JROF,1:NLEV+1)*ZAPH(JROF,1:NLEV+1)) & + & + SUM(ZAP0(JROF,1:NLEV)*ZAP(JROF,1:NLEV)) & + & + SUM(ZQ0(JROF,1:NLEV)*ZQ(JROF,1:NLEV)) & + & + SUM(ZZQSAT0(JROF,1:NLEV)*ZZQSAT(JROF,1:NLEV)) & + & + SUM(ZT0(JROF,1:NLEV)*ZT(JROF,1:NLEV)) & + & + SUM(ZL0(JROF,1:NLEV)*ZL(JROF,1:NLEV)) & + & + SUM(ZI0(JROF,1:NLEV)*ZI(JROF,1:NLEV)) & + & + SUM(ZLUDE0(JROF,1:NLEV)*ZLUDE(JROF,1:NLEV)) & + & + SUM(ZLU0(JROF,1:NLEV)*ZLU(JROF,1:NLEV)) & + & + SUM(ZMFU0(JROF,1:NLEV)*ZMFU(JROF,1:NLEV)) & + & + SUM(ZMFD0(JROF,1:NLEV)*ZMFD(JROF,1:NLEV)) & + & + SUM(ZTENI_T0(JROF,1:NLEV)*ZTENI_T(JROF,1:NLEV)) & + & + SUM(ZTENI_Q0(JROF,1:NLEV)*ZTENI_Q(JROF,1:NLEV)) & + & + SUM(ZTENI_L0(JROF,1:NLEV)*ZTENI_L(JROF,1:NLEV)) & + & + SUM(ZTENI_I0(JROF,1:NLEV)*ZTENI_I(JROF,1:NLEV)) & + & + SUM(ZSUPSAT0(JROF,1:NLEV)*ZSUPSAT(JROF,1:NLEV)) + ! Third norm + ! Note the machine precision is defined here as strictly 64bits + ! as we assume at worst 12 digits agreements in norms. + IF (ZNORM2(JROF) == 0._JPRB ) THEN + ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8) + ELSE + ZNORM3(JROF)=ABS(ZNORM1(JROF)-ZNORM2(JROF))/EPSILON(1._8)/ZNORM2(JROF) + ENDIF + ENDDO + + ZNORMG=MAX(ZNORMG,MAXVAL(ZNORM3(1:ICEND))) ! Log number of columns processed by this thread CALL TIMER%THREAD_LOG(TID, IGPC=ICEND) @@ -297,16 +297,16 @@ SUBROUTINE CLOUDSC_DRIVER_AD( & CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, ZHPM, NGPTOT) -! ! Print final test results -! print *, ' AD TEST ' -! print *, ' The maximum error is ',ZNORMG,' times the zero of the machine. ' -! print *, ' ============================= ' -! IF (ZNORMG < 10000._JPRB) THEN -! print *, ' = TEST OK = ' -! ELSE -! print *, ' = TEST FAILED = ' -! ENDIF -! print *, ' ============================= ' + ! Print final test results + print *, ' AD TEST ' + print *, ' The maximum error is ',ZNORMG,' times the zero of the machine. ' + print *, ' ============================= ' + IF (ZNORMG < 10000._JPRB) THEN + print *, ' = TEST OK = ' + ELSE + print *, ' = TEST FAILED = ' + ENDIF + print *, ' ============================= ' END SUBROUTINE CLOUDSC_DRIVER_AD diff --git a/src/cloudsc2_nl/dwarf_cloudsc.F90 b/src/cloudsc2_nl/dwarf_cloudsc.F90 index 89f1a41..7dcff77 100644 --- a/src/cloudsc2_nl/dwarf_cloudsc.F90 +++ b/src/cloudsc2_nl/dwarf_cloudsc.F90 @@ -122,7 +122,7 @@ PROGRAM DWARF_CLOUDSC & YRCST, YRTHF, YRPHNC, YREPHLI, YRECLD, YRECLDP) ! Validate the output against serialized reference data -!CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG) +CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG) IF (WRITE_REFERENCE == '1') THEN CALL GLOBAL_STATE%WRITE_REFERENCE(NPROMA) diff --git a/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 b/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 index 4293173..a244d91 100644 --- a/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 +++ b/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 @@ -248,27 +248,27 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & & PFHPSL5(:,:), PFHPSN5(:,:), PCOVPTOT5(:,:), & & YDCST, YDTHF, YHNC, YPHLI, YCLD, YCLDP) -! ! Compute final test norm -! ZCOUNT=0._JPRB -! ZNORM= 0._JPRB -! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%T, ZTENO_T5, ZTENO_T, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%Q, ZTENO_Q5, ZTENO_Q, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQL), ZTENO_L5, ZTENO_L, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQI), ZTENO_I5, ZTENO_I, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, PA(:,:,IBL), PA5, ZCLC, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, PFPLSL(:,:,IBL), PFPLSL5, ZFPLSL, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, PFPLSN(:,:,IBL), PFPLSN5, ZFPLSN, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, PFHPSL(:,:,IBL), PFHPSL5, ZFHPSL, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, PFHPSN(:,:,IBL), PFHPSN5, ZFHPSN, ZNORM, ZCOUNT, ZLAMBDA) -! CALL ERROR_NORM(ICEND, PCOVPTOT(:,:,IBL), PCOVPTOT5, ZCOVPTOT, ZNORM, ZCOUNT, ZLAMBDA) -! -! ! Global norm (normalize by number of active statistics) -! IF (ZNORM == 0._JPRB .OR. ZCOUNT == 0._JPRB) THEN -! print *, ' TL is totally wrong !!! ',ZNORM,ZCOUNT -! stop -! ELSE -! ZNORMG(ILAM)=MAX(ZNORMG(ILAM),ZNORM/ZCOUNT) -! ENDIF + ! Compute final test norm + ZCOUNT=0._JPRB + ZNORM= 0._JPRB + CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%T, ZTENO_T5, ZTENO_T, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%Q, ZTENO_Q5, ZTENO_Q, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQL), ZTENO_L5, ZTENO_L, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQI), ZTENO_I5, ZTENO_I, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, PA(:,:,IBL), PA5, ZCLC, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, PFPLSL(:,:,IBL), PFPLSL5, ZFPLSL, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, PFPLSN(:,:,IBL), PFPLSN5, ZFPLSN, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, PFHPSL(:,:,IBL), PFHPSL5, ZFHPSL, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, PFHPSN(:,:,IBL), PFHPSN5, ZFHPSN, ZNORM, ZCOUNT, ZLAMBDA) + CALL ERROR_NORM(ICEND, PCOVPTOT(:,:,IBL), PCOVPTOT5, ZCOVPTOT, ZNORM, ZCOUNT, ZLAMBDA) + + ! Global norm (normalize by number of active statistics) + IF (ZNORM == 0._JPRB .OR. ZCOUNT == 0._JPRB) THEN + print *, ' TL is totally wrong !!! ',ZNORM,ZCOUNT + stop + ELSE + ZNORMG(ILAM)=MAX(ZNORMG(ILAM),ZNORM/ZCOUNT) + ENDIF ENDDO ! end of lambda loops @@ -288,46 +288,46 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, ZHPM, NGPTOT) -! ! Evaluate the test and print the otput -! print *, ' TL Taylor test ' -! print *, ' Lambda Result' -! istart=0 -! DO ILAM=1,10 -! print *, ILAM, ZNORMG(ILAM) -! ! Redefine ZNORMG -! ZNORMG(ILAM)=ABS(1._JPRB - ZNORMG(ILAM)) -! ! filter out first members with strong NL departures -! if (istart == 0 .AND. ZNORMG(ILAM) < 0.5_JPRB ) istart=ILAM -! ENDDO -! -! print *, ' ============================================== ' -! IF (ISTART == 0 .OR. ISTART > 4 ) THEN -! print *, ' TEST FAILLED, err 13 ' -! ELSE -! ! V-shape test -! ITEST=-10 -! INEGAT=1 -! DO ILAM=ISTART,10-1 -! IF (ZNORMG(ILAM+1)/ZNORMG(ILAM) < 1._JPRB ) THEN -! ITEMPNEGAT = 1 -! ELSE -! ITEMPNEGAT = 0 -! ENDIF -! IF (INEGAT > ITEMPNEGAT) ITEST=ITEST+10 -! INEGAT=ITEMPNEGAT -! ENDDO -! IF (ITEST == -10) ITEST = 11 ! no change of sign at all -! ! Accuracy test -! IF (MINVAL(ZNORMG(ISTART:10)) > 0.00001_JPRB) ITEST=ITEST+7 ! Hard limit -! IF (MINVAL(ZNORMG(ISTART:10)) > 0.000001_JPRB) ITEST=ITEST+5 ! Soft limit -! ! Final prints -! IF (ITEST > 5) THEN -! print *, ' TEST FAILLED, err ',ITEST -! ELSE -! print *, ' TEST PASSED, penalty ',ITEST -! ENDIF -! ENDIF -! print *, ' ============================================== ' + ! Evaluate the test and print the otput + print *, ' TL Taylor test ' + print *, ' Lambda Result' + istart=0 + DO ILAM=1,10 + print *, ILAM, ZNORMG(ILAM) + ! Redefine ZNORMG + ZNORMG(ILAM)=ABS(1._JPRB - ZNORMG(ILAM)) + ! filter out first members with strong NL departures + if (istart == 0 .AND. ZNORMG(ILAM) < 0.5_JPRB ) istart=ILAM + ENDDO + + print *, ' ============================================== ' + IF (ISTART == 0 .OR. ISTART > 4 ) THEN + print *, ' TEST FAILLED, err 13 ' + ELSE + ! V-shape test + ITEST=-10 + INEGAT=1 + DO ILAM=ISTART,10-1 + IF (ZNORMG(ILAM+1)/ZNORMG(ILAM) < 1._JPRB ) THEN + ITEMPNEGAT = 1 + ELSE + ITEMPNEGAT = 0 + ENDIF + IF (INEGAT > ITEMPNEGAT) ITEST=ITEST+10 + INEGAT=ITEMPNEGAT + ENDDO + IF (ITEST == -10) ITEST = 11 ! no change of sign at all + ! Accuracy test + IF (MINVAL(ZNORMG(ISTART:10)) > 0.00001_JPRB) ITEST=ITEST+7 ! Hard limit + IF (MINVAL(ZNORMG(ISTART:10)) > 0.000001_JPRB) ITEST=ITEST+5 ! Soft limit + ! Final prints + IF (ITEST > 5) THEN + print *, ' TEST FAILLED, err ',ITEST + ELSE + print *, ' TEST PASSED, penalty ',ITEST + ENDIF + ENDIF + print *, ' ============================================== ' END SUBROUTINE CLOUDSC_DRIVER_TL