Skip to content

Commit

Permalink
Merge branch 'release-v2.1.0-rc16'
Browse files Browse the repository at this point in the history
  • Loading branch information
pseewald committed Sep 5, 2020
2 parents 28f8e75 + de079ea commit 6c52382
Show file tree
Hide file tree
Showing 44 changed files with 1,226 additions and 438 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MAJOR = 2
MINOR = 1
PATCH = 0-rc15
PATCH = 0-rc16
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
# it is considered Development version.
DATE =
27 changes: 15 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,24 +106,27 @@ add_fypp_sources(DBCSR_SRCS
work/dbcsr_work_operations.F
)

set(DBCSR_ACC_SRCS
acc/acc_dev.cpp
acc/acc_error.cpp
acc/acc_event.cpp
acc/acc_init.cpp
acc/acc_mem.cpp
acc/acc_stream.cpp
)

set(DBCSR_CUDA_SRCS
acc/cuda/acc_cuda.cpp
acc/cuda/dbcsr_cuda_nvtx_cu.cu
acc/cublaswrap/cublas.cu
acc/cuda/acc_dev.cpp
acc/cuda/acc_error.cpp
acc/cuda/acc_event.cpp
acc/cuda/acc_init.cpp
acc/cuda/acc_mem.cpp
acc/cuda/acc_stream.cpp
)

set(DBCSR_HIP_SRCS
acc/hip/acc_hip.cpp
acc/hipblaswrap/hipblas.cpp
acc/cuda/acc_dev.cpp
acc/cuda/acc_error.cpp
acc/cuda/acc_event.cpp
acc/cuda/acc_init.cpp
acc/cuda/acc_mem.cpp
acc/cuda/acc_stream.cpp
)

# set the __SHORT_FILE__ per file for dbcsr sources
Expand Down Expand Up @@ -193,7 +196,7 @@ if (MPI_FOUND)
endif ()

# =================================================================================================
# DBCSR's OpenMP/offload backend
# Link OpenMP runtime library even if DBCSR main code is not built with OpenMP

if (OpenMP_FOUND)
target_link_libraries(dbcsr PRIVATE OpenMP::OpenMP_Fortran)
Expand Down Expand Up @@ -243,7 +246,7 @@ if (USE_CUDA)
target_link_libraries(dbcsr PRIVATE nvrtc)

# Complete list of GPU-support sources
set(DBCSR_ACC_SRCS ${DBCSR_ACC_SRCS} ${DBCSR_CUDA_SRCS})
set(DBCSR_ACC_SRCS ${DBCSR_CUDA_SRCS})

# Make an object library
add_library(acc OBJECT ${DBCSR_ACC_SRCS})
Expand Down Expand Up @@ -284,7 +287,7 @@ if (USE_HIP)
target_link_libraries(dbcsr PUBLIC ${ROCM_HIPRTC_LIB})

# Complete list of GPU-support sources
set(DBCSR_ACC_SRCS ${DBCSR_ACC_SRCS} ${DBCSR_HIP_SRCS})
set(DBCSR_ACC_SRCS ${DBCSR_HIP_SRCS})

# Compile the rest of the HIP source files into a static library
set_source_files_properties(${DBCSR_ACC_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
Expand Down
4 changes: 2 additions & 2 deletions src/acc/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "Generic Accelerator API",
"description": "Generic accelerator API",
"archive": "libdbcsr",
"requires": ["../base", "include", "cuda", "hip", "libsmm_acc"]
"requires": ["../base", "cuda", "hip", "libsmm_acc"]
}
8 changes: 4 additions & 4 deletions src/acc/include/acc.h → src/acc/acc.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#include <stddef.h>

#ifdef __cplusplus
#if defined(__cplusplus)
extern "C" {
#endif

Expand All @@ -28,10 +28,10 @@ typedef enum acc_data_t {
ACC_DATA_UNKNOWN = -1
} acc_data_t;

/** accelerator driver initialization and finalization */
/** initialization and finalization */
int acc_init(void);
int acc_finalize(void);
int acc_clear_errors(void);
void acc_clear_errors(void);

/** devices */
int acc_get_ndevices(int* n_devices);
Expand Down Expand Up @@ -63,7 +63,7 @@ int acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t count, acc_s
int acc_memset_zero(void* dev_mem, size_t offset, size_t length, acc_stream_t* stream);
int acc_dev_mem_info(size_t* mem_free, size_t* mem_total);

#ifdef __cplusplus
#if defined(__cplusplus)
}
#endif

Expand Down
6 changes: 3 additions & 3 deletions src/acc/include/acc_libsmm.h → src/acc/acc_libsmm.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#include "acc.h"

#ifdef __cplusplus
#if defined(__cplusplus)
extern "C" {
#endif

Expand All @@ -21,7 +21,7 @@ typedef struct libsmm_acc_stack_descriptor_type {
} libsmm_acc_stack_descriptor_type;

int libsmm_acc_init(void);
int libsmm_acc_is_thread_safe(void);
acc_bool_t libsmm_acc_is_thread_safe(void);

int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int nblks,
void* dev_data, acc_data_t datatype, int m, int n, acc_stream_t* stream);
Expand All @@ -30,7 +30,7 @@ int libsmm_acc_process(const libsmm_acc_stack_descriptor_type* dev_param_stack,
int nparams, acc_data_t datatype, const void* dev_a_data, const void* dev_b_data, void* dev_c_data,
int m_max, int n_max, int k_max, acc_bool_t def_mnk, acc_stream_t* stream);

#ifdef __cplusplus
#if defined(__cplusplus)
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/acc/cublaswrap/cublas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include <stdio.h>
#include "cublas_v2.h"
#include "../acc_error.h"
#include "../cuda/acc_error.h"


/****************************************************************************/
Expand Down
4 changes: 2 additions & 2 deletions src/acc/cuda/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "Cuda backend for accelerator api",
"description": "Cuda backend for accelerator API",
"archive":"libdbcsr",
"requires": ["../../base", "../include"]
"requires": ["../../base", ".."]
}
15 changes: 8 additions & 7 deletions src/acc/acc_dev.cpp → src/acc/cuda/acc_dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

#include "acc_error.h"
#include "../acc.h"

#include <stdio.h>
#include <math.h>
#include "acc_error.h"
#include "include/acc.h"

// for debug purpose
static const int verbose_print = 1;
Expand All @@ -42,7 +43,7 @@ extern "C" int acc_set_active_device(int device_id){
// establish context
ACC_API_CALL(Free, (0));

#ifdef __HIP_PLATFORM_NVCC__
#if defined(__HIP_PLATFORM_NVCC__)
if (verbose_print){
ACC_API_CALL(DeviceSetLimit, (ACC(LimitPrintfFifoSize), (size_t) 1000000000));
}
Expand Down
11 changes: 6 additions & 5 deletions src/acc/acc_error.cpp → src/acc/cuda/acc_error.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

#include "acc_error.h"

#include <stdio.h>
#include <math.h>
#include "acc_error.h"

/****************************************************************************/
int acc_error_check (ACC(Error_t) error){
Expand Down
8 changes: 4 additions & 4 deletions src/acc/acc_error.h → src/acc/cuda/acc_error.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

int acc_error_check (ACC(Error_t) acc_error);
13 changes: 7 additions & 6 deletions src/acc/acc_event.cpp → src/acc/cuda/acc_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

#include "acc_error.h"
#include "../acc.h"

#include <stdio.h>
#include <math.h>
#include "acc_error.h"
#include "include/acc.h"

static const int verbose_print = 0;

Expand Down
17 changes: 9 additions & 8 deletions src/acc/acc_init.cpp → src/acc/cuda/acc_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

#include "../acc.h"
#include "../acc_libsmm.h"

#include <stdio.h>
#include "include/acc.h"
#include "include/acc_libsmm.h"

#ifdef __CUDA_PROFILING
#include <nvToolsExtCudaRt.h>
#if defined(__CUDA_PROFILING)
# include <nvToolsExtCudaRt.h>
#endif

/****************************************************************************/
Expand Down
13 changes: 7 additions & 6 deletions src/acc/acc_mem.cpp → src/acc/cuda/acc_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

#include "acc_error.h"
#include "../acc.h"

#include <stdio.h>
#include <math.h>
#include "acc_error.h"
#include "include/acc.h"

static const int verbose_print = 0;

Expand Down
19 changes: 10 additions & 9 deletions src/acc/acc_stream.cpp → src/acc/cuda/acc_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __CUDA
#include "cuda/acc_cuda.h"
#else
#include "hip/acc_hip.h"
#if defined(__CUDA)
# include "acc_cuda.h"
#elif defined(__HIP)
# include "../hip/acc_hip.h"
#endif

#include "acc_error.h"
#include "../acc.h"

#include <stdio.h>
#include <math.h>
#include "acc_error.h"
#include "include/acc.h"

#ifdef __CUDA_PROFILING
#include <nvToolsExtCudaRt.h>
#if defined(__CUDA_PROFILING)
# include <nvToolsExtCudaRt.h>
#endif

static const int verbose_print = 0;
Expand Down Expand Up @@ -53,7 +54,7 @@ extern "C" int acc_stream_create(void** stream_p, const char* name, int priority
if (acc_error_check(cErr)) return -1;
if (acc_error_check(ACC(GetLastError)())) return -1;

#ifdef __CUDA_PROFILING
#if defined(__CUDA_PROFILING)
nvtxNameCudaStreamA(*acc_stream, name);
#endif

Expand Down
4 changes: 2 additions & 2 deletions src/acc/hip/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "HIP backend for accelerator api",
"description": "HIP backend for accelerator API",
"archive":"libdbcsr",
"requires": ["../../base", "../include"]
"requires": ["../../base", "..", "../cuda"]
}
2 changes: 1 addition & 1 deletion src/acc/hipblaswrap/hipblas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "hipblas_v2.h"
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include "../acc_error.h"
#include "../cuda/acc_error.h"


/****************************************************************************/
Expand Down
5 changes: 0 additions & 5 deletions src/acc/include/PACKAGE

This file was deleted.

2 changes: 1 addition & 1 deletion src/acc/libsmm_acc/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "Generic GPU-accelerated library for small matrix multiplications",
"archive": "libdbcsr",
"requires": ["../include", "../cuda", "../hip"]
"requires": ["..", "../cuda", "../hip"]
}
10 changes: 4 additions & 6 deletions src/acc/libsmm_acc/kernels/smm_acc_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
* SPDX-License-Identifier: GPL-2.0+ *
*------------------------------------------------------------------------------------------------*/

#ifdef __HIP
#if not defined(__HIP_PLATFORM_NVCC__)
#include <hip/hip_runtime.h>
#endif
#if defined(__HIP) && !defined(__HIP_PLATFORM_NVCC__)
# include <hip/hip_runtime.h>
#endif

#define MAX(x, y) (((x) > (y)) ? (x) : (y))
Expand All @@ -23,7 +21,7 @@
* There is no native support for atomicAdd on doubles in Cuda 5.0. However *
* the following implementation is provided in the CUDA C Programing guide. *
******************************************************************************/
#ifdef __CUDA
#if defined(__CUDA)
#if (__CUDACC_VER_MAJOR__<8) || ( defined(__CUDA_ARCH__) && (__CUDA_ARCH__<600) )
static __device__ double atomicAdd(double *address, double val) {
unsigned long long int *address_as_ull =
Expand All @@ -43,7 +41,7 @@ static __device__ double atomicAdd(double *address, double val) {
/******************************************************************************
* A simple __ldg replacement for older cuda devices. *
******************************************************************************/
#ifdef __CUDA
#if defined(__CUDA)
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 350)
#define __ldg(x) (*(x))
#endif
Expand Down
Loading

0 comments on commit 6c52382

Please sign in to comment.