Skip to content

Commit

Permalink
Merge branch 'release-v2.0.0.rc2'
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Jul 11, 2019
2 parents 4547328 + ec2d7d5 commit e3002eb
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 58 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MAJOR = 2
MINOR = 0
PATCH = 0-rc1
PATCH = 0-rc2
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
# it is considered Development version.
DATE = 2019-07-08
84 changes: 34 additions & 50 deletions src/acc/libsmm_acc/libcusmm/libcusmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#include <array>
#include <iostream>

#if defined _OPENMP
#include <omp.h>
#endif

#define dbcsr_type_real_4 1
#define dbcsr_type_real_8 3
#define dbcsr_type_complex_4 5
Expand Down Expand Up @@ -201,58 +205,29 @@ int libcusmm_process_d(int *param_stack, int stack_size, CUstream stream, int m,
CUfunction kern_func = NULL;
int threads, grouping;
Triplet h_mnk = { m, n, k };
static bool cpu_fallback = false;
std::unordered_map<std::array<int, 3>, kernel_launcher>::iterator kernel_it;

// Look up the kernel in the table of already JITed kernels
if (kernel_handles.find(h_mnk) == kernel_handles.end()){ // the kernel has not been JIT-ed yet

static bool cpu_fallback = false;

// Add lock to table of locks and initialize it
// (Some serialization here)
#if defined _OPENMP
#pragma omp critical
{
if(kernel_locks.find(h_mnk) == kernel_locks.end()){
omp_lock_t lock_kernel_mnk;
kernel_locks.emplace(h_mnk, lock_kernel_mnk);
omp_init_lock(&kernel_locks.at(h_mnk));
}
}

// Set lock if it exists
if(kernel_locks.find(h_mnk) != kernel_locks.end()){
omp_set_lock(&kernel_locks.at(h_mnk));
}

// JIT the kernel using a single thread
if(kernel_handles.find(h_mnk) == kernel_handles.end()){
#endif
add_kernel_handle_to_jitted_kernels(kern_func, stream, h_mnk, threads, grouping, cpu_fallback);
#if defined _OPENMP
}

// Unset lock and destroy
if(kernel_locks.find(h_mnk) != kernel_locks.end()){
omp_unset_lock(&kernel_locks.at(h_mnk));
}

#pragma omp critical
{
if(kernel_locks.find(h_mnk) == kernel_locks.end()){
omp_destroy_lock(&kernel_locks.at(h_mnk));
auto it = kernel_locks.find(h_mnk);
kernel_locks.erase(it);
}
}
#pragma omp critical (jit_multiplication)
{
#endif

if(cpu_fallback)
return -2; // fall back to CPU
// Look up the kernel in the table of already JITed kernels
kernel_it = kernel_handles.find(h_mnk);
if (kernel_it == kernel_handles.end()){ // the kernel has not been JIT-ed yet

add_kernel_handle_to_jitted_kernels(kern_func, stream, h_mnk, threads, grouping, cpu_fallback);
kernel_it = kernel_handles.find(h_mnk);

} // now the kernel has been jitted

// Look up the kernel in the table of already JITed kernels
auto kernel_it = kernel_handles.find(h_mnk);
#if defined _OPENMP
}
#endif

if(cpu_fallback)
return -2; // fall back to CPU

// Retrieve kernel launching parameters
kern_func = kernel_it->second.kernel_function;
Expand Down Expand Up @@ -330,20 +305,29 @@ int libcusmm_transpose_d(int *trs_stack, int offset, int nblks,

// Look up the kernel in the table of already JITed kernels
Triplet h_mnk = { m, n, 0 };
auto kernel_it = transpose_handles.find(h_mnk);
if(kernel_it != transpose_handles.end()){ // the kernel has already been JITed
std::unordered_map<std::array<int, 3>, CUfunction>::iterator kernel_it;

kern_func = kernel_it->second; // retrieve handle
#if defined _OPENMP
#pragma omp critical (jit_transpose)
{
#endif

} else { // the kernel has not been JIT-ed yet
kernel_it = transpose_handles.find(h_mnk);
if(kernel_it == transpose_handles.end()){ // the kernel has not been JIT-ed yet

// JIT and store a kernel for this transposition
jit_transpose_handle(kern_func, m, n);
transpose_handles.emplace(h_mnk, kern_func);
kernel_it = transpose_handles.find(h_mnk);

}

// Construct argument pointer list and lauch function
#if defined _OPENMP
}
#endif

// Construct argument pointer list and launch function
kern_func = kernel_it->second; // retrieve handle
int* trs_stack_ = trs_stack + offset;
void *args[] = { &trs_stack_, &buffer};

Expand Down
7 changes: 0 additions & 7 deletions src/acc/libsmm_acc/libcusmm/libcusmm.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
#include <unordered_map>
#include <vector>

#if defined _OPENMP
#include <omp.h>
#endif

// Macros for CUDA error handling
// Wrap calls to CUDA NVRTC API
#define NVRTC_SAFE_CALL(name, x) \
Expand Down Expand Up @@ -51,9 +47,6 @@ struct kernel_launcher {
};

static std::unordered_map<Triplet, kernel_launcher> kernel_handles;
#if defined _OPENMP
static std::unordered_map<Triplet, omp_lock_t> kernel_locks;
#endif

int libcusmm_process_d(int *param_stack, int stack_size,
CUstream stream, int m, int n, int k,
Expand Down

0 comments on commit e3002eb

Please sign in to comment.