Skip to content

Commit

Permalink
add attribute((always_inline)) to CUTLASS_GLOBAL
Browse files Browse the repository at this point in the history
  • Loading branch information
AD2605 committed Oct 22, 2024
1 parent 70264d4 commit 804061b
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
2 changes: 1 addition & 1 deletion include/cutlass/detail/helper_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

#if defined(CUTLASS_ENABLE_SYCL)
#define CUTLASS_HOST
#define CUTLASS_GLOBAL
#define CUTLASS_GLOBAL __attribute__((always_inline)) inline
#define CUTLASS_SHARED
#else
#define CUTLASS_HOST __host__
Expand Down
5 changes: 2 additions & 3 deletions include/cutlass/device_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ void Kernel2(typename Operator::Params params) {

/// Generic CUTLASS kernel template.
template <typename Operator>
CUTLASS_GLOBAL
#if defined(CUTLASS_ENABLE_SYCL)
__attribute__((always_inline)) inline void
device_kernel(typename Operator::Params const& params, sycl::local_ptr<char> smem) {
void device_kernel(typename Operator::Params const& params, sycl::local_ptr<char> smem) {
#else
CUTLASS_GLOBAL
#ifdef __CUDACC__
// Enclosing this in __CUDACC__ suppresses MSVC warnings.
__launch_bounds__(Operator::MaxThreadsPerBlock, Operator::MinBlocksPerMultiprocessor)
Expand Down

0 comments on commit 804061b

Please sign in to comment.