diff --git a/include/cutlass/detail/helper_macros.hpp b/include/cutlass/detail/helper_macros.hpp
index 8cdb1ab102..d180d6dc9d 100644
--- a/include/cutlass/detail/helper_macros.hpp
+++ b/include/cutlass/detail/helper_macros.hpp
@@ -61,7 +61,7 @@
 #endif
 
 #if defined(CUTLASS_ENABLE_SYCL)
-#define CUTLASS_HOST inline
+#define CUTLASS_HOST
 #define CUTLASS_GLOBAL
 #define CUTLASS_SHARED
 #else
diff --git a/include/cutlass/gpu_generics.h b/include/cutlass/gpu_generics.h
index e2d8d6c06c..c74dda2366 100644
--- a/include/cutlass/gpu_generics.h
+++ b/include/cutlass/gpu_generics.h
@@ -329,7 +329,7 @@ CUTLASS_HOST_DEVICE
 void cuGetErrorString(cudaError_t error, const char **) {
 }
 
-CUTLASS_HOST
+inline CUTLASS_HOST
 cudaError_t cudaGetLastError() {
   return cudaSuccess;
 }
@@ -382,7 +382,7 @@ CUresult cuMemsetD8Async(CUdeviceptr devPtr, uint8_t value, size_t count, cudaSt
 using cudaFuncAttribute = unsigned int;
 constexpr cudaFuncAttribute cudaFuncAttributeMaxDynamicSharedMemorySize = 0;
 
-CUTLASS_HOST
+inline CUTLASS_HOST
 cudaError_t cudaFuncSetAttribute(const void *func, cudaFuncAttribute attr, int value) {
   return cudaSuccess;
 }
@@ -397,7 +397,7 @@ cudaError_t cudaDeviceGetAttribute(int *value, cudaDeviceAttr attr, int device)
 
 constexpr unsigned int cudaOccupancyDisableCachingOverride = 0;
 
-CUTLASS_HOST
+inline CUTLASS_HOST
 cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
         int *numBlocks, const void *func, int blockSize, size_t dynamicSMemSize, unsigned int flags) {
   return cudaSuccess;