diff --git a/CMakeLists.txt b/CMakeLists.txt index eb26466..98fbefd 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,8 +73,18 @@ if( HAVE_HDF5 ) list(APPEND CLOUDSC_DEFINITIONS HAVE_HDF5 ) endif() + +### Loki ecbuild_find_package( NAME loki ) +# Add option for single-precision builds +ecbuild_add_option( FEATURE SINGLE_PRECISION + DESCRIPTION "Build CLOUDSC in single precision" DEFAULT OFF +) +if( HAVE_SINGLE_PRECISION ) + list(APPEND CLOUDSC_DEFINITIONS SINGLE) +endif() + # build executables add_subdirectory(src) diff --git a/arch/cscs/daint/intel/6.0.10/env.sh b/arch/cscs/daint/intel/6.0.10/env.sh new file mode 100644 index 0000000..9294001 --- /dev/null +++ b/arch/cscs/daint/intel/6.0.10/env.sh @@ -0,0 +1,41 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module purge -f + +# Load modules +module load daint-gpu +module load PrgEnv-intel/6.0.10 +module swap intel/2021.3.0 intel-classic/2022.1.0 +#module load Boost +module load CMake +module load cudatoolkit/11.2.0_3.39-2.1__gf93aa1c # needed for cmake to find hdf5 +module load cray-hdf5-parallel +module load cray-python + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/cscs/daint/intel/6.0.10/toolchain.cmake b/arch/cscs/daint/intel/6.0.10/toolchain.cmake new file mode 100644 index 0000000..0b4f921 --- /dev/null +++ b/arch/cscs/daint/intel/6.0.10/toolchain.cmake @@ -0,0 +1,149 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# ARCHITECTURE +#################################################################### + +set( EC_HAVE_C_INLINE 1 ) +set( EC_HAVE_FUNCTION_DEF 1 ) +set( EC_HAVE_CXXABI_H 1 ) +set( EC_HAVE_CXX_BOOL 1 ) +set( EC_HAVE_CXX_SSTREAM 1 ) +set( EC_HAVE_CXX_INT_128 0 ) +set( CMAKE_SIZEOF_VOID_P 8 ) +set( EC_SIZEOF_PTR 8 ) +set( EC_SIZEOF_CHAR 1 ) +set( EC_SIZEOF_SHORT 2 ) +set( EC_SIZEOF_INT 4 ) +set( EC_SIZEOF_LONG 8 ) +set( EC_SIZEOF_LONG_LONG 8 ) +set( EC_SIZEOF_FLOAT 4 ) +set( EC_SIZEOF_DOUBLE 8 ) +set( EC_SIZEOF_LONG_DOUBLE 8 ) +set( EC_SIZEOF_SIZE_T 8 ) +set( EC_SIZEOF_SSIZE_T 8 ) +set( EC_SIZEOF_OFF_T 8 ) +set( EC_BIG_ENDIAN 0 ) +set( EC_LITTLE_ENDIAN 1 ) +set( IEEE_BE 0 ) +set( IEEE_LE 1 ) +set( EC_HAVE_FSEEK 1 ) +set( EC_HAVE_FSEEKO 1 ) +set( EC_HAVE_FTELLO 1 ) +set( EC_HAVE_LSEEK 0 ) +set( EC_HAVE_FTRUNCATE 0 ) +set( EC_HAVE_OPEN 0 ) +set( EC_HAVE_FOPEN 1 ) +set( EC_HAVE_FMEMOPEN 1 ) +set( EC_HAVE_FUNOPEN 0 ) +set( EC_HAVE_FLOCK 1 ) +set( EC_HAVE_MMAP 1 ) +set( EC_HAVE_POSIX_MEMALIGN 1 ) +set( EC_HAVE_F_GETLK 1 ) +set( EC_HAVE_F_SETLK 1 ) +set( EC_HAVE_F_SETLKW 1 ) +set( EC_HAVE_F_GETLK64 1 ) +set( EC_HAVE_F_SETLK64 1 ) +set( EC_HAVE_F_SETLKW64 1 ) +set( EC_HAVE_MAP_ANONYMOUS 1 ) +set( EC_HAVE_MAP_ANON 1 ) +set( EC_HAVE_ASSERT_H 1 ) +set( EC_HAVE_STDLIB_H 1 ) +set( EC_HAVE_UNISTD_H 1 ) +set( EC_HAVE_STRING_H 1 ) +set( EC_HAVE_STRINGS_H 1 ) +set( EC_HAVE_SYS_STAT_H 1 ) +set( EC_HAVE_SYS_TIME_H 1 ) +set( EC_HAVE_SYS_TYPES_H 1 ) +set( EC_HAVE_MALLOC_H 1 ) +set( EC_HAVE_SYS_MALLOC_H 0 ) +set( EC_HAVE_SYS_PARAM_H 1 ) +set( EC_HAVE_SYS_MOUNT_H 1 ) +set( EC_HAVE_SYS_VFS_H 1 ) +set( EC_HAVE_OFFT 1 ) +set( EC_HAVE_OFF64T 1 ) +set( EC_HAVE_STRUCT_STAT 1 ) +set( EC_HAVE_STRUCT_STAT64 1 ) +set( EC_HAVE_STAT 1 ) +set( EC_HAVE_STAT64 1 ) +set( EC_HAVE_FSTAT 1 ) +set( EC_HAVE_FSTAT64 1 ) +set( EC_HAVE_FSEEKO64 1 ) +set( EC_HAVE_FTELLO64 1 ) +set( EC_HAVE_LSEEK64 1 ) +set( EC_HAVE_OPEN64 1 ) +set( EC_HAVE_FOPEN64 1 ) +set( EC_HAVE_FTRUNCATE64 1 ) +set( EC_HAVE_FLOCK64 1 ) +set( EC_HAVE_MMAP64 1 ) +set( EC_HAVE_STRUCT_STATVFS 1 ) +set( EC_HAVE_STRUCT_STATVFS64 1 ) +set( EC_HAVE_FOPENCOOKIE 1 ) +set( EC_HAVE_FSYNC 1 ) +set( EC_HAVE_FDATASYNC 1 ) +set( EC_HAVE_DIRFD 1 ) +set( EC_HAVE_SYSPROC 0 ) +set( EC_HAVE_SYSPROCFS 1 ) +set( EC_HAVE_EXECINFO_BACKTRACE 1 ) +set( EC_HAVE_GMTIME_R 1 ) +set( EC_HAVE_GETPWUID_R 1 ) +set( EC_HAVE_GETPWNAM_R 1 ) +set( EC_HAVE_READDIR_R 1 ) +set( EC_HAVE_DIRENT_D_TYPE 1 ) +set( EC_HAVE_GETHOSTBYNAME_R 1 ) +set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 ) +set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 ) +set( EC_HAVE_PROCFS 1 ) +set( EC_HAVE_DLFCN_H 1 ) +set( EC_HAVE_DLADDR 1 ) +set( EC_HAVE_AIOCB 1 ) +set( EC_HAVE_AIOCB64 1 ) + +# Disable relative rpaths as aprun does not respect it +set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE ) + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) +set( ECBUILD_TRUST_FLAGS ON ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) + +set(ECBUILD_Fortran_FLAGS "-g") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=core-avx2") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions") +#set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=1500") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -no-fma") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume realloc_lhs") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-model precise") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ftz") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-speculation=safe") +#set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fast-transcendentals") + +#################################################################### +# LINK FLAGS +#################################################################### + +set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp" ) +set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap" ) +set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap -Wl,--as-needed" ) +set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" ) diff --git a/arch/cscs/daint/nvidia/6.0.10/env.sh b/arch/cscs/daint/nvidia/6.0.10/env.sh new file mode 100644 index 0000000..888d3ec --- /dev/null +++ b/arch/cscs/daint/nvidia/6.0.10/env.sh @@ -0,0 +1,45 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module purge -f + +# Load modules +module load daint-gpu +module load PrgEnv-nvidia/6.0.10 +module swap nvidia/21.3 nvidia/22.5 +#module load Boost +module load CMake +module load cudatoolkit/11.2.0_3.39-2.1__gf93aa1c # needed for cmake to find hdf5 +module load cray-hdf5-parallel +module load cray-python + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" +export BOOST_ROOT=/users/subbiali/boost/1.82.0 diff --git a/arch/cscs/daint/nvidia/6.0.10/toolchain.cmake b/arch/cscs/daint/nvidia/6.0.10/toolchain.cmake new file mode 100644 index 0000000..5ec011b --- /dev/null +++ b/arch/cscs/daint/nvidia/6.0.10/toolchain.cmake @@ -0,0 +1,53 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -gpu=cc60,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# CUDA FLAGS +#################################################################### + +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 60) +endif() +if(NOT DEFINED CMAKE_CUDA_COMPILER) + set(CMAKE_CUDA_COMPILER /opt/nvidia/hpc_sdk/Linux_x86_64/21.3/compilers/bin/nvcc) +endif() + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh new file mode 100644 index 0000000..2cc0f9b --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh @@ -0,0 +1,49 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/22.08 +# module_load partition/G +module_load rocm/5.0.2 +module_load cce/14.0.2 +module_load cray-libsci/22.08.1.1 +module_load cray-mpich/8.1.18 +module_load craype/2.7.17 +module_load craype-accel-amd-gfx90a +module_load buildtools/22.08 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake new file mode 100644 index 0000000..0774cf5 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake @@ -0,0 +1,42 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( ENABLE_OMP ON CACHE STRING "" ) +set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-homp" CACHE STRING "" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC ON CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hacc" ) +set( OpenACC_CXX_FLAGS "-hacc" ) +set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh new file mode 100644 index 0000000..9a66e15 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh @@ -0,0 +1,51 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/23.03 +# module_load partition/G +module_load rocm/5.2.3 +module_load cce/15.0.1 +module_load cray-libsci/22.08.1.1 +module_load cray-mpich/8.1.18 +module_load craype/2.7.20 +module_load craype-accel-amd-gfx90a +module_load buildtools/23.03 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 +module_load Boost/1.81.0-cpeCray-23.03 +module_load partition/G + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake new file mode 100644 index 0000000..d9c08be --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake @@ -0,0 +1,58 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( ENABLE_OMP ON CACHE STRING "" ) +set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-fopenmp -hlist=aimd" CACHE STRING "" ) + +set( OpenMP_C_LIB_NAMES "craymp" ) +set( OpenMP_CXX_LIB_NAMES "craymp" ) +set( OpenMP_Fortran_LIB_NAMES "craymp" ) +set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/15.0.1/cce/x86_64/lib/libcraymp.so" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC ON CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hacc" ) +set( OpenACC_CXX_FLAGS "-hacc" ) +set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math") +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES gfx90a) +endif() + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -G2 -haggress -DNDEBUG") +# set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") diff --git a/arch/eurohpc/meluxina/nvhpc/22.7/env.sh b/arch/eurohpc/meluxina/nvhpc/22.7/env.sh new file mode 100644 index 0000000..326a341 --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/22.7/env.sh @@ -0,0 +1,52 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module --force purge + +# Load modules +module_load env/release/2022.1 +module_load CUDA/11.7.0 +module_load NVHPC/22.7-CUDA-11.7.0 +module_load OpenMPI/4.1.4-GCC-11.3.0 +module_load CMake +module_load Boost +module_load Python +#module_load HDF5 + +export CC=nvc +export CXX=nvc++ +export F77=nvfortran +export FC=nvfortran +export F90=nvfortran + +export HDF5_ROOT=/project/home/p200177/nasu/hdf5/1.14.1-2/build/release/2022.1/nvhpc/22.7/ + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake new file mode 100644 index 0000000..ce8de9d --- /dev/null +++ b/arch/eurohpc/meluxina/nvhpc/22.7/toolchain.cmake @@ -0,0 +1,57 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module +# unless its stored as a cache variable +set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) + +# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to +# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone +# doesn't have any effect here as the module uses FORCE to overwrite the +# existing value) +set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) diff --git a/bundle.yml b/bundle.yml index ab14ad1..e21e513 100755 --- a/bundle.yml +++ b/bundle.yml @@ -5,12 +5,13 @@ name : cloudsc-bundle version : 1.0.0-develop cmake : > CMAKE_LINK_DEPENDS_NO_SHARED=ON + ENABLE_OMP=ON projects : - ecbuild : git : https://github.com/ecmwf/ecbuild - version : 3.7.0 + version : 3.8.0 bundle : false - loki : @@ -35,6 +36,10 @@ options : help : Specify compiler options via supplied toolchain file cmake : CMAKE_TOOLCHAIN_FILE={{value}} + - single-precision : + help : Enable single precision build of the dwarf + cmake : ENABLE_SINGLE_PRECISION=ON + - with-loki : help : Enable Loki/CLAW source-to-source transformations cmake : > diff --git a/cloudsc-bundle b/cloudsc-bundle index 771ac48..8e8ccbe 100755 --- a/cloudsc-bundle +++ b/cloudsc-bundle @@ -25,15 +25,11 @@ shift BUNDLE_DIR="$( cd $( dirname "${BASH_SOURCE[0]}" ) && pwd -P )" -if [[ -z "${BITBUCKET}" ]]; then - export BITBUCKET=ssh://git@git.ecmwf.int -fi - # Download ecbundle scripts if not already available command_exists () { type "$1" &> /dev/null ; } if ! command_exists ${BOOTSTRAPPED} ; then if [[ ! -d ${BUNDLE_DIR}/ecbundle ]]; then - git clone ${BITBUCKET}/escape/ecbundle.git ${BUNDLE_DIR}/ecbundle + git clone https://github.com/ecmwf/ecbundle.git ${BUNDLE_DIR}/ecbundle ( cd ${BUNDLE_DIR}/ecbundle && git checkout ${ecbundle_VERSION} ) fi export PATH=${BUNDLE_DIR}/ecbundle/bin:${PATH} diff --git a/src/cloudsc2_ad_loki/CMakeLists.txt b/src/cloudsc2_ad_loki/CMakeLists.txt index d7a4dde..6dc2ee4 100755 --- a/src/cloudsc2_ad_loki/CMakeLists.txt +++ b/src/cloudsc2_ad_loki/CMakeLists.txt @@ -21,20 +21,21 @@ if( HAVE_CLOUDSC2_AD_LOKI ) ## * Internal "do-nothing" mode for Loki debug ## #################################################### - - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} + loki_transform( + COMMAND convert + MODE idem + FRONTEND ${LOKI_FRONTEND} CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 INCLUDES ${COMMON_INCLUDE} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem OUTPUT - loki-idem/satur.idem.F90 - loki-idem/cloudsc2ad.idem.F90 - loki-idem/cloudsc2tl.idem.F90 - loki-idem/cloudsc_driver_ad_loki_mod.idem.F90 - DEPENDS satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 + loki-idem/satur.idem.F90 + loki-idem/cloudsc2ad.idem.F90 + loki-idem/cloudsc2tl.idem.F90 + loki-idem/cloudsc_driver_ad_loki_mod.idem.F90 + DEPENDS satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 ) ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-idem @@ -57,50 +58,58 @@ if( HAVE_CLOUDSC2_AD_LOKI ) OMP 1 ) - #################################################### - ## "Single Column Coalesced" (SCC) mode ## - ## * Removes horizontal vector loops ## - ## * Invokes compute kernel as `!$acc vector` ## - #################################################### - - - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc - OUTPUT - loki-scc/satur.scc.F90 - loki-scc/cloudsc2ad.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 - DEPENDS - satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 - ) - ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-scc - SOURCES - dwarf_cloudsc.F90 - loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc2ad.scc.F90 - loki-scc/satur.scc.F90 - LIBS - cloudsc2-common-lib - DEFINITIONS ${CLOUDSC_DEFINITIONS} - ) - ecbuild_add_test( - TARGET dwarf-cloudsc2-ad--loki-scc-serial - COMMAND bin/dwarf-cloudsc2-ad-loki-scc - ARGS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G - ) + #################################################### + ## "Single Column Coalesced" (SCC) mode ## + ## * Removes horizontal vector loops ## + ## * Invokes compute kernel as `!$acc vector` ## + #################################################### + + loki_transform( + COMMAND convert + MODE scc + FRONTEND ${LOKI_FRONTEND} + DIRECTIVE openacc + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + OUTPUT + loki-scc/satur.scc.F90 + loki-scc/cloudsc2ad.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 + DEPENDS + satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-scc + SOURCES + dwarf_cloudsc.F90 + loki-scc/cloudsc_driver_ad_loki_mod.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc2ad.scc.F90 + loki-scc/satur.scc.F90 + LIBS + cloudsc2-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc2-ad--loki-scc-serial + COMMAND bin/dwarf-cloudsc2-ad-loki-scc + ARGS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G + ) + + #################################################### ## SCC-hoist mode ## ## * SCC with vector loop hoisted ## @@ -108,15 +117,17 @@ if( HAVE_CLOUDSC2_AD_LOKI ) ## * Temporary arrays hoisted to driver ## #################################################### - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc-hoist + FRONTEND ${LOKI_FRONTEND} + DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist OUTPUT loki-scc-hoist/satur.scc_hoist.F90 loki-scc-hoist/cloudsc2ad.scc_hoist.F90 @@ -124,7 +135,11 @@ if( HAVE_CLOUDSC2_AD_LOKI ) loki-scc-hoist/cloudsc_driver_ad_loki_mod.scc_hoist.F90 DEPENDS satur.F90 cloudsc2ad.F90 cloudsc2tl.F90 cloudsc_driver_ad_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP ) + ecbuild_add_executable( TARGET dwarf-cloudsc2-ad-loki-scc-hoist SOURCES dwarf_cloudsc.F90 diff --git a/src/cloudsc2_ad_loki/cloudsc_loki.config b/src/cloudsc2_ad_loki/cloudsc_loki.config index f525adc..0b824a2 100644 --- a/src/cloudsc2_ad_loki/cloudsc_loki.config +++ b/src/cloudsc2_ad_loki/cloudsc_loki.config @@ -14,41 +14,40 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_end', 'performance_timer%thread_log', 'performance_timer%thread_log', 'performance_timer%print_performance'] + # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver_ad' -expand = true -role = 'driver' +[routines] -[[routine]] -name = 'cloudsc2ad' -role = 'kernel' -expand = true +[routines.cloudsc_driver_ad] + role = 'driver' + expand = true -[[routine]] -name = 'cloudsc2tl' -role = 'kernel' -expand = true +[routines.cloudsc2ad] + role = 'kernel' + expand = true -[[routine]] -name = 'satur' -role = 'kernel' -expand = true -############################################## - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines.cloudsc2tl] + role = 'kernel' + expand = true + +[routines.satur] + role = 'kernel' + expand = true + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' diff --git a/src/cloudsc2_nl_loki/CMakeLists.txt b/src/cloudsc2_nl_loki/CMakeLists.txt index 3e8ef97..5aff52b 100755 --- a/src/cloudsc2_nl_loki/CMakeLists.txt +++ b/src/cloudsc2_nl_loki/CMakeLists.txt @@ -15,19 +15,22 @@ if( HAVE_CLOUDSC2_NL_LOKI ) set( COMMON_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/../common/include" ) set( LOKI_FRONTEND "fp" CACHE STRING "Frontend parser for Loki transforms" ) + + #################################################### ## Idempotence mode: ## ## * Internal "do-nothing" mode for Loki debug ## #################################################### - - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} + loki_transform( + COMMAND convert + MODE idem + FRONTEND ${LOKI_FRONTEND} CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 INCLUDES ${COMMON_INCLUDE} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem OUTPUT loki-idem/satur.idem.F90 loki-idem/cloudsc2.idem.F90 loki-idem/cloudsc_driver_loki_mod.idem.F90 DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 ) @@ -58,19 +61,22 @@ if( HAVE_CLOUDSC2_NL_LOKI ) ## * Invokes compute kernel as `!$acc vector` ## #################################################### - - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc OUTPUT loki-scc/satur.scc.F90 loki-scc/cloudsc2.scc.F90 loki-scc/cloudsc_driver_loki_mod.scc.F90 DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP ) ecbuild_add_executable( TARGET dwarf-cloudsc2-nl-loki-scc @@ -93,6 +99,7 @@ if( HAVE_CLOUDSC2_NL_LOKI ) ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G ) + #################################################### ## SCC-hoist mode ## ## * SCC with vector loop hoisted ## @@ -100,22 +107,25 @@ if( HAVE_CLOUDSC2_NL_LOKI ) ## * Temporary arrays hoisted to driver ## #################################################### - - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE scc-hoist + FRONTEND ${LOKI_FRONTEND} DIRECTIVE openacc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist OUTPUT loki-scc-hoist/satur.scc_hoist.F90 loki-scc-hoist/cloudsc2.scc_hoist.F90 loki-scc-hoist/cloudsc_driver_loki_mod.scc_hoist.F90 - DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 + DEPENDS satur.F90 cloudsc2.F90 cloudsc_driver_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP ) ecbuild_add_executable( TARGET dwarf-cloudsc2-nl-loki-scc-hoist @@ -137,4 +147,5 @@ if( HAVE_CLOUDSC2_NL_LOKI ) OMP 1 ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=19G ) + endif() diff --git a/src/cloudsc2_nl_loki/cloudsc_loki.config b/src/cloudsc2_nl_loki/cloudsc_loki.config index f85ab06..1d012f2 100644 --- a/src/cloudsc2_nl_loki/cloudsc_loki.config +++ b/src/cloudsc2_nl_loki/cloudsc_loki.config @@ -14,36 +14,36 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_end', 'performance_timer%thread_log', 'performance_timer%thread_log', 'performance_timer%print_performance'] + # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver' -expand = true -role = 'driver' +[routines] -[[routine]] -name = 'cloudsc2' -role = 'kernel' -expand = true +[routines.cloudsc_driver] + role = 'driver' + expand = true -[[routine]] -name = 'satur' -role = 'kernel' -expand = true -############################################## - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines.cloudsc2] + role = 'kernel' + expand = true + +[routines.satur] + expand = true + role = 'kernel' + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' diff --git a/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 b/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 index bf9e350..a244d91 100644 --- a/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 +++ b/src/cloudsc2_tl/cloudsc_driver_tl_mod.F90 @@ -250,7 +250,7 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & ! Compute final test norm ZCOUNT=0._JPRB - ZNORM= 0._JPRB + ZNORM= 0._JPRB CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%T, ZTENO_T5, ZTENO_T, ZNORM, ZCOUNT, ZLAMBDA) CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%Q, ZTENO_Q5, ZTENO_Q, ZNORM, ZCOUNT, ZLAMBDA) CALL ERROR_NORM(ICEND, TENDENCY_LOC(IBL)%CLD(:,:,NCLDQL), ZTENO_L5, ZTENO_L, ZNORM, ZCOUNT, ZLAMBDA) @@ -297,7 +297,7 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & ! Redefine ZNORMG ZNORMG(ILAM)=ABS(1._JPRB - ZNORMG(ILAM)) ! filter out first members with strong NL departures - if (istart == 0 .AND. ZNORMG(ILAM) < 0.5_JPRB ) istart=ILAM + if (istart == 0 .AND. ZNORMG(ILAM) < 0.5_JPRB ) istart=ILAM ENDDO print *, ' ============================================== ' @@ -325,7 +325,7 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & print *, ' TEST FAILLED, err ',ITEST ELSE print *, ' TEST PASSED, penalty ',ITEST - ENDIF + ENDIF ENDIF print *, ' ============================================== ' diff --git a/src/cloudsc2_tl_loki/CMakeLists.txt b/src/cloudsc2_tl_loki/CMakeLists.txt index 48abcdd..89965d6 100755 --- a/src/cloudsc2_tl_loki/CMakeLists.txt +++ b/src/cloudsc2_tl_loki/CMakeLists.txt @@ -15,26 +15,28 @@ if( HAVE_CLOUDSC2_TL_LOKI ) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + #################################################### ## Idempotence mode: ## ## * Internal "do-nothing" mode for Loki debug ## #################################################### - - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + MODE idem + FRONTEND ${LOKI_FRONTEND} CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} HEADERS ${COMMON_MODULE}/yomphyder.F90 INCLUDES ${COMMON_INCLUDE} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem - OUTPUT - loki-idem/satur.idem.F90 - loki-idem/cloudsc2.idem.F90 - loki-idem/cloudsc2tl.idem.F90 - loki-idem/cloudsc_driver_tl_loki_mod.idem.F90 - DEPENDS - satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + OUTPUT + loki-idem/satur.idem.F90 + loki-idem/cloudsc2.idem.F90 + loki-idem/cloudsc2tl.idem.F90 + loki-idem/cloudsc_driver_tl_loki_mod.idem.F90 + DEPENDS satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 + CPP ) ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-idem @@ -59,103 +61,111 @@ if( HAVE_CLOUDSC2_TL_LOKI ) ) - #################################################### - ## "Single Column Coalesced" (SCC) mode ## - ## * Removes horizontal vector loops ## - ## * Invokes compute kernel as `!$acc vector` ## - #################################################### - - - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc - OUTPUT - loki-scc/satur.scc.F90 - loki-scc/cloudsc2.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 - DEPENDS - satur.F90 - cloudsc2.F90 - cloudsc2tl.F90 - cloudsc_driver_tl_loki_mod.F90 - ) - - ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc - SOURCES - dwarf_cloudsc.F90 - loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 - loki-scc/cloudsc2tl.scc.F90 - loki-scc/cloudsc2.scc.F90 - loki-scc/satur.scc.F90 - error.F90 - LIBS - cloudsc2-common-lib - DEFINITIONS ${CLOUDSC_DEFINITIONS} - ) - - ecbuild_add_test( - TARGET dwarf-cloudsc2-tl--loki-scc-serial - COMMAND bin/dwarf-cloudsc2-tl-loki-scc - ARGS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G - ) - - #################################################### - ## SCC-hoist mode ## - ## * SCC with vector loop hoisted ## - ## * Kernel is "seq, but args are full blocks ## - ## * Temporary arrays hoisted to driver ## - #################################################### - - - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD REMOVE_OPENMP - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist - OUTPUT - loki-scc-hoist/satur.scc_hoist.F90 - loki-scc-hoist/cloudsc2.scc_hoist.F90 - loki-scc-hoist/cloudsc2tl.scc_hoist.F90 - loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 - DEPENDS - satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 - ) - - - ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc-hoist - SOURCES - dwarf_cloudsc.F90 - loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 - loki-scc-hoist/cloudsc2tl.scc_hoist.F90 - loki-scc-hoist/cloudsc2.scc_hoist.F90 - loki-scc-hoist/satur.scc_hoist.F90 - error.F90 - LIBS - cloudsc2-common-lib - DEFINITIONS ${CLOUDSC_DEFINITIONS} - ) + #################################################### + ## "Single Column Coalesced" (SCC) mode ## + ## * Removes horizontal vector loops ## + ## * Invokes compute kernel as `!$acc vector` ## + #################################################### + + loki_transform( + COMMAND convert + MODE scc + FRONTEND ${LOKI_FRONTEND} + DIRECTIVE openacc + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + OUTPUT + loki-scc/satur.scc.F90 + loki-scc/cloudsc2.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 + DEPENDS + satur.F90 + cloudsc2.F90 + cloudsc2tl.F90 + cloudsc_driver_tl_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc + SOURCES + dwarf_cloudsc.F90 + loki-scc/cloudsc_driver_tl_loki_mod.scc.F90 + loki-scc/cloudsc2tl.scc.F90 + loki-scc/cloudsc2.scc.F90 + loki-scc/satur.scc.F90 + error.F90 + LIBS + cloudsc2-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc2-tl--loki-scc-serial + COMMAND bin/dwarf-cloudsc2-tl-loki-scc + ARGS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=9G + ) + + + #################################################### + ## SCC-hoist mode ## + ## * SCC with vector loop hoisted ## + ## * Kernel is "seq, but args are full blocks ## + ## * Temporary arrays hoisted to driver ## + #################################################### + + loki_transform( + COMMAND convert + MODE scc-hoist + FRONTEND ${LOKI_FRONTEND} + DIRECTIVE openacc + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + OUTPUT + loki-scc-hoist/satur.scc_hoist.F90 + loki-scc-hoist/cloudsc2.scc_hoist.F90 + loki-scc-hoist/cloudsc2tl.scc_hoist.F90 + loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 + DEPENDS + satur.F90 cloudsc2.F90 cloudsc2tl.F90 cloudsc_driver_tl_loki_mod.F90 + CPP + DATA_OFFLOAD + REMOVE_OPENMP + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc2-tl-loki-scc-hoist + SOURCES + dwarf_cloudsc.F90 + loki-scc-hoist/cloudsc_driver_tl_loki_mod.scc_hoist.F90 + loki-scc-hoist/cloudsc2tl.scc_hoist.F90 + loki-scc-hoist/cloudsc2.scc_hoist.F90 + loki-scc-hoist/satur.scc_hoist.F90 + error.F90 + LIBS + cloudsc2-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) - ecbuild_add_test( - TARGET dwarf-cloudsc2-tl-loki-scc-hoist-serial - COMMAND bin/dwarf-cloudsc2-tl-loki-scc-hoist - ARGS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=19G - ) + ecbuild_add_test( + TARGET dwarf-cloudsc2-tl-loki-scc-hoist-serial + COMMAND bin/dwarf-cloudsc2-tl-loki-scc-hoist + ARGS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ENVIRONMENT NV_ACC_CUDA_HEAPSIZE=19G + ) endif() diff --git a/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 b/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 index 2d6be3a..263bfca 100644 --- a/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 +++ b/src/cloudsc2_tl_loki/cloudsc_driver_tl_loki_mod.F90 @@ -259,7 +259,7 @@ SUBROUTINE CLOUDSC_DRIVER_TL( & CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, ZHPM, NGPTOT) - CALL VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & + CALL VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, NGPBLKS, & & BUFFER_LOC(:,:,1,:) , ZTENO_T5(:,:,:,:), ZTENO_T(:,:,:), & & BUFFER_LOC(:,:,3,:) , ZTENO_Q5(:,:,:,:), ZTENO_Q(:,:,:), & & BUFFER_LOC(:,:,3+NCLDQL,:), ZTENO_L5(:,:,:,:), ZTENO_L(:,:,:), & diff --git a/src/cloudsc2_tl_loki/cloudsc_loki.config b/src/cloudsc2_tl_loki/cloudsc_loki.config index 675d59c..ea14231 100644 --- a/src/cloudsc2_tl_loki/cloudsc_loki.config +++ b/src/cloudsc2_tl_loki/cloudsc_loki.config @@ -15,43 +15,40 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_log', 'performance_timer%print_performance', 'validate_taylor_test'] + # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver_tl' -expand = true -role = 'driver' +[routines] -[[routine]] -name = 'cloudsc2' -role = 'kernel' -expand = true +[routines.cloudsc_driver_tl] + role = 'driver' + expand = true -[[routine]] -name = 'cloudsc2tl' -role = 'kernel' -expand = true +[routines.cloudsc2] + role = 'kernel' + expand = true +[routines.cloudsc2tl] + role = 'kernel' + expand = true -[[routine]] -name = 'satur' -role = 'kernel' -expand = true - -############################################## - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines.satur] + role = 'kernel' + expand = true + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' diff --git a/src/cloudsc2_tl_loki/error.F90 b/src/cloudsc2_tl_loki/error.F90 index 8557338..b3a83e6 100644 --- a/src/cloudsc2_tl_loki/error.F90 +++ b/src/cloudsc2_tl_loki/error.F90 @@ -17,7 +17,7 @@ SUBROUTINE ERROR_NORM(NLON, FIELD, PERT5, PERT, ZNORM, ZCOUNT, ZLAMBDA) END SUBROUTINE ERROR_NORM - SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & + SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, NGPBLKS, & & FIELD_T, PERT5_T, PERT_T, & & FIELD_Q, PERT5_Q, PERT_Q, & & FIELD_L, PERT5_L, PERT_L, & @@ -29,7 +29,7 @@ SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & & PFHPSN, PERT5_FHPSN, PERT_FHPSN, & & PCOVPTOT, PERT5_COVPTOT, PERT_COVPTOT & & ) - INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NLEV, NLAM, NGPTOT + INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NLEV, NLAM, NGPTOT, NGPBLKS REAL(KIND=JPRB), INTENT(IN) :: FIELD_T(NPROMA,NLEV,NGPBLKS), PERT5_T(NPROMA,NLEV,NLAM,NGPBLKS), PERT_T(NPROMA,NLEV,NGPBLKS) REAL(KIND=JPRB), INTENT(IN) :: FIELD_Q(NPROMA,NLEV,NGPBLKS), PERT5_Q(NPROMA,NLEV,NLAM,NGPBLKS), PERT_Q(NPROMA,NLEV,NGPBLKS) REAL(KIND=JPRB), INTENT(IN) :: FIELD_L(NPROMA,NLEV,NGPBLKS), PERT5_L(NPROMA,NLEV,NLAM,NGPBLKS), PERT_L(NPROMA,NLEV,NGPBLKS) @@ -41,11 +41,10 @@ SUBROUTINE VALIDATE_TAYLOR_TEST(NPROMA, NLEV, NLAM, NGPTOT, & REAL(KIND=JPRB), INTENT(IN) :: PFHPSN(NPROMA,NLEV+1,NGPBLKS), PERT5_FHPSN(NPROMA,NLEV+1,NLAM,NGPBLKS), PERT_FHPSN(NPROMA,NLEV+1,NGPBLKS) REAL(KIND=JPRB), INTENT(IN) :: PCOVPTOT(NPROMA,NLEV,NGPBLKS), PERT5_COVPTOT(NPROMA,NLEV,NLAM,NGPBLKS), PERT_COVPTOT(NPROMA,NLEV,NGPBLKS) - INTEGER(KIND=JPIM) :: JKGLO, IBL, ICEND, ILAM, NGPBLKS + INTEGER(KIND=JPIM) :: JKGLO, IBL, ICEND, ILAM INTEGER(KIND=JPIM) :: ISTART,ITEST,INEGAT,ITEMPNEGAT REAL(KIND=JPRB) :: ZLAMBDA, ZCOUNT, ZNORM, ZNORMG(10) - NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) DO JKGLO=1,NGPTOT,NPROMA IBL = (JKGLO-1)/NPROMA+1 ICEND = MIN(NPROMA,NGPTOT-JKGLO+1) diff --git a/src/common/module/expand_mod.F90 b/src/common/module/expand_mod.F90 index 236dbe8..c647bf8 100644 --- a/src/common/module/expand_mod.F90 +++ b/src/common/module/expand_mod.F90 @@ -8,7 +8,7 @@ ! nor does it submit to any jurisdiction. ! module expand_mod - USE PARKIND1 , ONLY : JPIM, JPRB, JPRD + USE PARKIND1 , ONLY : JPIM, JPRB USE YOMPHYDER, ONLY : STATE_TYPE use cloudsc_mpi_mod, only : irank, numproc @@ -83,10 +83,10 @@ end subroutine load_and_expand_l1 subroutine load_and_expand_r1(name, field, nlon, nproma, ngptot, nblocks, ngptotg) ! Load into the local memory buffer and expand to global field character(len=*) :: name - real(kind=JPRB), allocatable, intent(inout) :: field(:,:) + real(kind=jprb), allocatable, intent(inout) :: field(:,:) integer(kind=jpim), intent(in) :: nlon, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:), rbuf(:) + real(kind=jprb), allocatable :: buffer(:), rbuf(:) integer(kind=jpim) :: start, end, size call get_offsets(start, end, size, nlon, 1, 1, ngptot, ngptotg) @@ -100,10 +100,10 @@ end subroutine load_and_expand_r1 subroutine load_and_expand_r2(name, field, nlon, nlev, nproma, ngptot, nblocks, ngptotg) ! Load into the local memory buffer and expand to global field character(len=*) :: name - real(kind=JPRB), allocatable, intent(inout) :: field(:,:,:) + real(kind=jprb), allocatable, intent(inout) :: field(:,:,:) integer(kind=jpim), intent(in) :: nlon, nlev, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:,:), rbuf(:,:) + real(kind=jprb), allocatable :: buffer(:,:), rbuf(:,:) integer(kind=jpim) :: start, end, size call get_offsets(start, end, size, nlon, 1, nlev, ngptot, ngptotg) @@ -117,10 +117,10 @@ end subroutine load_and_expand_r2 subroutine load_and_expand_r3(name, field, nlon, nlev, ndim, nproma, ngptot, nblocks, ngptotg) ! Load into the local memory buffer and expand to global field character(len=*) :: name - real(kind=JPRB), allocatable, intent(inout) :: field(:,:,:,:) + real(kind=jprb), allocatable, intent(inout) :: field(:,:,:,:) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:,:,:), rbuf(:,:,:) + real(kind=jprb), allocatable :: buffer(:,:,:), rbuf(:,:,:) integer(kind=jpim) :: start, end, size call get_offsets(start, end, size, nlon, ndim, nlev, ngptot, ngptotg) @@ -135,10 +135,10 @@ subroutine load_and_expand_state(name, state, field, nlon, nlev, ndim, nproma, n ! Load into the local memory buffer and expand to global field character(len=*) :: name type(state_type), allocatable, intent(inout) :: state(:) - real(kind=JPRB), allocatable, target, intent(inout) :: field(:,:,:,:) + real(kind=jprb), allocatable, target, intent(inout) :: field(:,:,:,:) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer(kind=jpim), intent(in), optional :: ngptotg - real(kind=jprd), allocatable :: buffer(:,:,:), rbuf(:,:,:) + real(kind=jprb), allocatable :: buffer(:,:,:), rbuf(:,:,:) integer(kind=jpim) :: start, end, size integer :: b @@ -235,8 +235,8 @@ subroutine expand_i1(buffer, field, nlon, nproma, ngptot, nblocks) end subroutine expand_i1 subroutine expand_r1(buffer, field, nlon, nproma, ngptot, nblocks) - real(kind=JPRD), intent(inout) :: buffer(nlon) - real(kind=JPRB), intent(inout) :: field(nproma, nblocks) + real(kind=jprb), intent(inout) :: buffer(nlon) + real(kind=jprb), intent(inout) :: field(nproma, nblocks) integer(kind=jpim), intent(in) :: nlon, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend @@ -269,8 +269,8 @@ end subroutine expand_r1 subroutine expand_r2(buffer, field, nlon, nproma, nlev, ngptot, nblocks) use omp_lib - real(kind=JPRD), intent(inout) :: buffer(nlon, nlev) - real(kind=JPRB), intent(inout) :: field(nproma, nlev, nblocks) + real(kind=jprb), intent(inout) :: buffer(nlon, nlev) + real(kind=jprb), intent(inout) :: field(nproma, nlev, nblocks) integer(kind=jpim), intent(in) :: nlon, nlev, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend @@ -302,8 +302,8 @@ subroutine expand_r2(buffer, field, nlon, nproma, nlev, ngptot, nblocks) end subroutine expand_r2 subroutine expand_r3(buffer, field, nlon, nproma, nlev, ndim, ngptot, nblocks) - real(kind=JPRD), intent(inout) :: buffer(nlon, nlev, ndim) - real(kind=JPRB), intent(inout) :: field(nproma, nlev, ndim, nblocks) + real(kind=jprb), intent(inout) :: buffer(nlon, nlev, ndim) + real(kind=jprb), intent(inout) :: field(nproma, nlev, ndim, nblocks) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend