diff --git a/cmake/options.cmake b/cmake/options.cmake index cd1c8be3c56..0520f931b7e 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -146,6 +146,7 @@ set(DNNL_ENABLE_PRIMITIVE_GPU_ISA "ALL" CACHE STRING at build time. Regardless of value chosen, reference OpenCL-based implementations will always be available. Valid values: - ALL (the default). Includes all ISA to be enabled. + - NONE. Includes no ISAs, just the generic kernels. - ;;... Includes only selected ISA to be enabled. Possible values are: GEN9, GEN11, XELP, XEHP, XEHPG, XEHPC, XE2.") @@ -281,13 +282,13 @@ endif() set(DNNL_GPU_VENDOR "NONE" CACHE STRING "When DNNL_GPU_RUNTIME is not NONE DNNL_GPU_VENDOR specifies target GPU - vendor for GPU engines. Can be INTEL (default), NVIDIA or AMD.") + vendor for GPU engines. Can be INTEL (default), GENERIC, NVIDIA or AMD.") if(NOT DNNL_GPU_RUNTIME STREQUAL "NONE" AND DNNL_GPU_VENDOR STREQUAL "NONE") set(DNNL_GPU_VENDOR "INTEL") endif() -if(NOT "${DNNL_GPU_VENDOR}" MATCHES "^(NONE|INTEL|NVIDIA|AMD)$") +if(NOT "${DNNL_GPU_VENDOR}" MATCHES "^(NONE|GENERIC|INTEL|NVIDIA|AMD)$") message(FATAL_ERROR "Unsupported GPU vendor: ${DNNL_GPU_VENDOR}") endif() @@ -327,6 +328,12 @@ else() set(DNNL_WITH_SYCL false) endif() +if(DNNL_GPU_RUNTIME STREQUAL "OCL") # ... OR DNNL_CPU_RUNTIME STREQUAL "OCL") + set(DNNL_WITH_OCL true) +else() + set(DNNL_WITH_OCL false) +endif() + # ============= # Miscellaneous # ============= diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d286751d2ee..5c50e64d20b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -125,6 +125,10 @@ endif() add_subdirectory(common) +if(DNNL_WITH_SYCL OR DNNL_GPU_RUNTIME STREQUAL "OCL") + add_subdirectory(xpu) +endif() + if(NOT DNNL_CPU_RUNTIME STREQUAL "NONE") add_subdirectory(cpu) endif() @@ -133,10 +137,6 @@ if(NOT DNNL_GPU_RUNTIME STREQUAL "NONE") add_subdirectory(gpu) endif() -if(DNNL_WITH_SYCL OR DNNL_GPU_RUNTIME STREQUAL "OCL") - add_subdirectory(xpu) -endif() - if(DNNL_WITH_SYCL) # Enable linking SYCL kernels. if(DNNL_SYCL_CUDA) diff --git a/src/gpu/CMakeLists.txt b/src/gpu/CMakeLists.txt index f49a9bd2d63..17dd337a46c 100644 --- a/src/gpu/CMakeLists.txt +++ b/src/gpu/CMakeLists.txt @@ -26,12 +26,14 @@ set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS add_subdirectory(generic) -if(DNNL_GPU_VENDOR STREQUAL "INTEL") - add_definitions_with_host_compiler(-DNGEN_CPP11) - add_definitions_with_host_compiler(-DNGEN_SAFE) - add_definitions_with_host_compiler(-DNGEN_NEO_INTERFACE) - add_definitions_with_host_compiler(-DNGEN_NO_OP_NAMES) - add_definitions_with_host_compiler(-DNGEN_WINDOWS_COMPAT) +if(DNNL_WITH_OCL) + if(DNNL_GPU_VENDOR STREQUAL "INTEL") + add_definitions_with_host_compiler(-DNGEN_CPP11) + add_definitions_with_host_compiler(-DNGEN_SAFE) + add_definitions_with_host_compiler(-DNGEN_NEO_INTERFACE) + add_definitions_with_host_compiler(-DNGEN_NO_OP_NAMES) + add_definitions_with_host_compiler(-DNGEN_WINDOWS_COMPAT) + endif() add_subdirectory(intel) endif() diff --git a/src/gpu/gpu_batch_normalization_list.cpp b/src/gpu/gpu_batch_normalization_list.cpp index e37b4edd65a..6cc312bbd2f 100644 --- a/src/gpu/gpu_batch_normalization_list.cpp +++ b/src/gpu/gpu_batch_normalization_list.cpp @@ -16,13 +16,14 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL -#include "gpu/intel/ocl/bnorm/gen9_batch_normalization.hpp" -#include "gpu/intel/ocl/bnorm/nhwc_batch_normalization.hpp" #include "gpu/intel/ocl/bnorm/ref_batch_normalization.hpp" #include "gpu/intel/ocl/bnorm/reusable_bnorm.hpp" #include "gpu/intel/ocl/bnorm/simple_bnorm.hpp" +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL +#include "gpu/intel/ocl/bnorm/gen9_batch_normalization.hpp" +#include "gpu/intel/ocl/bnorm/nhwc_batch_normalization.hpp" + #ifdef DNNL_DEV_MODE #include "gpu/intel/ocl/bnorm/nhwc_reusable.hpp" #endif @@ -52,9 +53,9 @@ const std::map> GPU_INSTANCE_INTEL_DEVMODE(intel::ocl::nhwc_reusable_batch_normalization_fwd_t) GPU_INSTANCE_INTEL(intel::ocl::nhwc_batch_normalization_fwd_t) GPU_INSTANCE_INTEL(intel::ocl::gen9_batch_normalization_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::simple_batch_normalization_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::reusable_batch_normalization_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_batch_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_batch_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_batch_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_batch_normalization_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_batch_normalization_fwd_t) GPU_INSTANCE_AMD(amd::miopen_batch_normalization_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_batch_normalization_fwd_t) @@ -64,9 +65,9 @@ const std::map> GPU_INSTANCE_INTEL_DEVMODE(intel::ocl::nhwc_reusable_batch_normalization_bwd_t) GPU_INSTANCE_INTEL(intel::ocl::nhwc_batch_normalization_bwd_t) GPU_INSTANCE_INTEL(intel::ocl::gen9_batch_normalization_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::simple_batch_normalization_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::reusable_batch_normalization_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_batch_normalization_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_batch_normalization_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_batch_normalization_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_batch_normalization_bwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_batch_normalization_bwd_t) GPU_INSTANCE_AMD(amd::miopen_batch_normalization_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_batch_normalization_bwd_t) diff --git a/src/gpu/gpu_binary_list.cpp b/src/gpu/gpu_binary_list.cpp index 965b9fc51fe..1ce42013c40 100644 --- a/src/gpu/gpu_binary_list.cpp +++ b/src/gpu/gpu_binary_list.cpp @@ -15,11 +15,11 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/multi_po_reorder_binary.hpp" +#include "gpu/intel/ocl/simple_binary.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/gen9_binary.hpp" -#include "gpu/intel/ocl/multi_po_reorder_binary.hpp" -#include "gpu/intel/ocl/simple_binary.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -39,9 +39,9 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_BINARY_P({ - GPU_INSTANCE_INTEL(intel::ocl::multi_po_reorder_binary) + GPU_INSTANCE_GENERIC(intel::ocl::multi_po_reorder_binary) GPU_INSTANCE_INTEL(intel::ocl::gen9_binary_t) - GPU_INSTANCE_INTEL(intel::ocl::simple_binary_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_binary_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_binary_t) GPU_INSTANCE_AMD(amd::miopen_binary_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_binary_t) diff --git a/src/gpu/gpu_concat_list.cpp b/src/gpu/gpu_concat_list.cpp index 24105ee7b7f..110229743a8 100644 --- a/src/gpu/gpu_concat_list.cpp +++ b/src/gpu/gpu_concat_list.cpp @@ -17,12 +17,12 @@ #include "gpu/gpu_impl_list.hpp" #include "gpu/generic/ref_concat.hpp" - -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL -#include "gpu/intel/ocl/gen9_concat.hpp" #include "gpu/intel/ocl/multi_concat.hpp" #include "gpu/intel/ocl/reusable_simple_concat.hpp" #include "gpu/intel/ocl/simple_concat.hpp" + +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL +#include "gpu/intel/ocl/gen9_concat.hpp" #endif namespace dnnl { @@ -33,10 +33,10 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_CONCAT_P({ - GPU_CONCAT_INSTANCE_INTEL(intel::ocl::reusable_simple_concat_t) - GPU_CONCAT_INSTANCE_INTEL(intel::ocl::simple_concat_t) + GPU_CONCAT_INSTANCE_GENERIC(intel::ocl::reusable_simple_concat_t) + GPU_CONCAT_INSTANCE_GENERIC(intel::ocl::simple_concat_t) GPU_CONCAT_INSTANCE_INTEL(intel::ocl::gen9_concat_t) - GPU_CONCAT_INSTANCE_INTEL(intel::ocl::multi_concat_t) + GPU_CONCAT_INSTANCE_GENERIC(intel::ocl::multi_concat_t) GPU_CONCAT_INSTANCE_GENERIC(generic::ref_concat_t) nullptr, }); diff --git a/src/gpu/gpu_convolution_list.cpp b/src/gpu/gpu_convolution_list.cpp index 88d5969683f..77d3d082701 100644 --- a/src/gpu/gpu_convolution_list.cpp +++ b/src/gpu/gpu_convolution_list.cpp @@ -15,12 +15,12 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/ref_convolution.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/jit/binary_format.hpp" #include "gpu/intel/jit/conv/gen_convolution.hpp" #include "gpu/intel/ocl/gen9_wino_convolution.hpp" -#include "gpu/intel/ocl/ref_convolution.hpp" #ifdef DNNL_DEV_MODE #include "gpu/intel/jit/v2/conv/gen_convolution.hpp" @@ -50,7 +50,7 @@ const std::map> GPU_INSTANCE_INTEL_DEVMODE(intel::jit::v2::conv::gen_convolution_fwd_t) GPU_INSTANCE_INTEL(intel::jit::gen_convolution_fwd_t) GPU_INSTANCE_INTEL(intel::ocl::gen9_wino_convolution_fwd_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_convolution_fwd_t) + GPU_INSTANCE_REF(intel::ocl::ref_convolution_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_convolution_fwd_t) GPU_INSTANCE_AMD(amd::miopen_convolution_fwd_t) nullptr, @@ -58,7 +58,7 @@ const std::map> {{backward_data}, REG_BWD_D_PK({ GPU_INSTANCE_INTEL_DEVMODE(intel::jit::v2::conv::gen_convolution_bwd_data_t) GPU_INSTANCE_INTEL(intel::jit::gen_convolution_bwd_data_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_convolution_bwd_data_t) + GPU_INSTANCE_REF(intel::ocl::ref_convolution_bwd_data_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_convolution_bwd_data_t) GPU_INSTANCE_AMD(amd::miopen_convolution_bwd_data_t) nullptr, @@ -66,7 +66,7 @@ const std::map> {{backward_weights}, REG_BWD_PK({ GPU_INSTANCE_INTEL_DEVMODE(intel::jit::v2::conv::gen_convolution_bwd_weights_t) GPU_INSTANCE_INTEL(intel::jit::gen_convolution_bwd_weights_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_convolution_bwd_weights_t) + GPU_INSTANCE_REF(intel::ocl::ref_convolution_bwd_weights_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_convolution_bwd_weights_t) GPU_INSTANCE_AMD(amd::miopen_convolution_bwd_weights_t) nullptr, diff --git a/src/gpu/gpu_deconvolution_list.cpp b/src/gpu/gpu_deconvolution_list.cpp index 5e8f07a8c5d..54352acec0e 100644 --- a/src/gpu/gpu_deconvolution_list.cpp +++ b/src/gpu/gpu_deconvolution_list.cpp @@ -15,10 +15,7 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" - -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/convolution_deconvolution.hpp" -#endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA #include "gpu/nvidia/cudnn_deconvolution.hpp" @@ -39,14 +36,14 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_DECONV_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::convolution_deconvolution_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::convolution_deconvolution_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_deconvolution_fwd_t) GPU_INSTANCE_AMD(amd::miopen_deconvolution_fwd_t) nullptr, }}, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::convolution_deconvolution_bwd_data_t) - GPU_INSTANCE_INTEL(intel::ocl::convolution_deconvolution_bwd_weights_t) + GPU_INSTANCE_GENERIC(intel::ocl::convolution_deconvolution_bwd_data_t) + GPU_INSTANCE_GENERIC(intel::ocl::convolution_deconvolution_bwd_weights_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_deconvolution_bwd_data_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_deconvolution_bwd_weights_t) GPU_INSTANCE_AMD(amd::miopen_deconvolution_bwd_data_t) diff --git a/src/gpu/gpu_eltwise_list.cpp b/src/gpu/gpu_eltwise_list.cpp index 9250e1c43a6..30cfc62af0f 100644 --- a/src/gpu/gpu_eltwise_list.cpp +++ b/src/gpu/gpu_eltwise_list.cpp @@ -15,10 +15,10 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/ref_eltwise.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/gen9_eltwise.hpp" -#include "gpu/intel/ocl/ref_eltwise.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -42,7 +42,7 @@ const std::map> impl_list_map REG_ELTWISE_P({ {{forward}, { GPU_INSTANCE_INTEL(intel::ocl::gen9_eltwise_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_eltwise_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_eltwise_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_eltwise_fwd_t) GPU_INSTANCE_AMD(amd::miopen_eltwise_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_sycl_eltwise_fwd_t) @@ -50,7 +50,7 @@ const std::map> }}, {{backward}, REG_BWD_PK({ GPU_INSTANCE_INTEL(intel::ocl::gen9_eltwise_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_eltwise_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_eltwise_bwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_eltwise_bwd_t) GPU_INSTANCE_AMD(amd::miopen_eltwise_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_sycl_eltwise_bwd_t) diff --git a/src/gpu/gpu_gemm_list.cpp b/src/gpu/gpu_gemm_list.cpp index 08358253269..2a3ea0a368f 100644 --- a/src/gpu/gpu_gemm_list.cpp +++ b/src/gpu/gpu_gemm_list.cpp @@ -17,14 +17,14 @@ #include "common/compiler_workarounds.hpp" #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/gemm/gemm_with_post_ops.hpp" +#include "gpu/intel/ocl/gemm/ref_gemm.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/jit/binary_format.hpp" #include "gpu/intel/jit/gemm/gen_gemm.hpp" #include "gpu/intel/jit/gemm/xe_hp_systolic_gemm.hpp" -#include "gpu/intel/ocl/gemm/gemm_with_post_ops.hpp" -#include "gpu/intel/ocl/gemm/ref_gemm.hpp" #ifdef DNNL_DEV_MODE #include "gpu/intel/ocl/gemm/conv_gemm.hpp" @@ -44,7 +44,7 @@ constexpr impl_list_item_t impl_list[] = { GPU_INSTANCE_INTEL(intel::jit::xe_hp_systolic_gemm_t) GPU_INSTANCE_INTEL(intel::ocl::gemm_with_post_ops_t) GPU_INSTANCE_INTEL(intel::jit::gen_gemm_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_gemm_t) + GPU_INSTANCE_REF(intel::ocl::ref_gemm_t) nullptr, }; // clang-format on diff --git a/src/gpu/gpu_group_normalization_list.cpp b/src/gpu/gpu_group_normalization_list.cpp index 65e3c4c6e2e..1b0bdafb136 100644 --- a/src/gpu/gpu_group_normalization_list.cpp +++ b/src/gpu/gpu_group_normalization_list.cpp @@ -15,10 +15,7 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" - -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/ref_group_normalization.hpp" -#endif namespace dnnl { namespace impl { @@ -31,12 +28,12 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_GNORM_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::ref_group_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_group_normalization_fwd_t) nullptr, } }, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::ref_group_normalization_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_group_normalization_bwd_t) nullptr, }) }, diff --git a/src/gpu/gpu_impl_list.hpp b/src/gpu/gpu_impl_list.hpp index 992ac436428..b7180cb30a9 100644 --- a/src/gpu/gpu_impl_list.hpp +++ b/src/gpu/gpu_impl_list.hpp @@ -150,10 +150,9 @@ namespace gpu { // Instance macros that are enabled only when REF is disabled #ifdef DNNL_DISABLE_GPU_REF_KERNELS -#define GPU_INSTANCE_INTEL_REF(...) +#define GPU_INSTANCE_REF(...) #else -#define GPU_INSTANCE_INTEL_REF(...) \ - DNNL_GPU_INTEL_ONLY(GPU_INSTANCE(__VA_ARGS__)) +#define GPU_INSTANCE_REF(...) GPU_INSTANCE(__VA_ARGS__) #endif #define DECLARE_IMPL_LIST(kind) \ diff --git a/src/gpu/gpu_inner_product_list.cpp b/src/gpu/gpu_inner_product_list.cpp index b13f990a9a5..f32d521c56d 100644 --- a/src/gpu/gpu_inner_product_list.cpp +++ b/src/gpu/gpu_inner_product_list.cpp @@ -15,12 +15,12 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/convolution_inner_product.hpp" +#include "gpu/intel/ocl/ref_inner_product.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL -#include "gpu/intel/ocl/convolution_inner_product.hpp" #include "gpu/intel/ocl/gemm_inner_product.hpp" #include "gpu/intel/ocl/gemm_post_ops_inner_product.hpp" -#include "gpu/intel/ocl/ref_inner_product.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -45,7 +45,7 @@ const std::map> {{forward}, { GPU_INSTANCE_INTEL(intel::ocl::gemm_inner_product_fwd_t) GPU_INSTANCE_INTEL(intel::ocl::convolution_inner_product_fwd_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_inner_product_fwd_t) + GPU_INSTANCE_REF(intel::ocl::ref_inner_product_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_gemm_inner_product_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_conv_inner_product_fwd_t) GPU_INSTANCE_AMD(amd::miopen_gemm_inner_product_fwd_t) @@ -54,8 +54,8 @@ const std::map> {{backward}, REG_BWD_PK({ GPU_INSTANCE_INTEL(intel::ocl::gemm_inner_product_bwd_data_t) GPU_INSTANCE_INTEL(intel::ocl::gemm_inner_product_bwd_weights_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_inner_product_bwd_data_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_inner_product_bwd_weights_t) + GPU_INSTANCE_REF(intel::ocl::ref_inner_product_bwd_data_t) + GPU_INSTANCE_REF(intel::ocl::ref_inner_product_bwd_weights_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_gemm_inner_product_bwd_data_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_gemm_inner_product_bwd_weights_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_conv_inner_product_bwd_data_t) diff --git a/src/gpu/gpu_layer_normalization_list.cpp b/src/gpu/gpu_layer_normalization_list.cpp index 70fa92bd7e8..3e427eab763 100644 --- a/src/gpu/gpu_layer_normalization_list.cpp +++ b/src/gpu/gpu_layer_normalization_list.cpp @@ -16,12 +16,10 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/ref_layer_normalization.hpp" #include "gpu/intel/ocl/reusable_lnorm.hpp" #include "gpu/intel/ocl/reusable_vectorized_lnorm.hpp" #include "gpu/intel/ocl/vectorized_lnorm.hpp" -#endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA #include "gpu/generic/sycl/ref_layer_normalizations.hpp" @@ -38,17 +36,17 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_LNORM_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::reusable_vectorized_layer_normalization_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::vectorized_lnorm_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_layer_normalization_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::reusable_layer_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_vectorized_layer_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::vectorized_lnorm_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_layer_normalization_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_layer_normalization_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_layer_normalization_fwd_t) nullptr, }}, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::vectorized_lnorm_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_layer_normalization_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::reusable_layer_normalization_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::vectorized_lnorm_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_layer_normalization_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_layer_normalization_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_layer_normalization_bwd_t) nullptr, })}, diff --git a/src/gpu/gpu_lrn_list.cpp b/src/gpu/gpu_lrn_list.cpp index 40a6dd688c7..8d339037238 100644 --- a/src/gpu/gpu_lrn_list.cpp +++ b/src/gpu/gpu_lrn_list.cpp @@ -16,9 +16,7 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/ref_lrn.hpp" -#endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA #include "gpu/generic/sycl/ref_lrn.hpp" @@ -40,14 +38,14 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_LRN_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::ref_lrn_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_lrn_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_lrn_fwd_t) GPU_INSTANCE_AMD(amd::miopen_lrn_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_sycl_lrn_fwd_t) nullptr, }}, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::ref_lrn_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_lrn_bwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_lrn_bwd_t) GPU_INSTANCE_AMD(amd::miopen_lrn_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_sycl_lrn_bwd_t) diff --git a/src/gpu/gpu_matmul_list.cpp b/src/gpu/gpu_matmul_list.cpp index 940ce994d49..e586489b4af 100644 --- a/src/gpu/gpu_matmul_list.cpp +++ b/src/gpu/gpu_matmul_list.cpp @@ -15,10 +15,10 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/ref_matmul.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/gemm_matmul.hpp" -#include "gpu/intel/ocl/ref_matmul.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -38,7 +38,7 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({ GPU_INSTANCE_INTEL(intel::ocl::gemm_matmul_t) - GPU_INSTANCE_INTEL_REF(intel::ocl::ref_matmul_t) + GPU_INSTANCE_REF(intel::ocl::ref_matmul_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_matmul_t) GPU_INSTANCE_AMD(amd::miopen_matmul_t) nullptr, diff --git a/src/gpu/gpu_pooling_list.cpp b/src/gpu/gpu_pooling_list.cpp index 12da2459cc6..afcbb4d26db 100644 --- a/src/gpu/gpu_pooling_list.cpp +++ b/src/gpu/gpu_pooling_list.cpp @@ -17,12 +17,12 @@ #include "common/compiler_workarounds.hpp" #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/ref_pooling.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/jit/pooling/gen_pooling.hpp" #include "gpu/intel/ocl/gen9_global_pooling.hpp" #include "gpu/intel/ocl/gen9_pooling.hpp" -#include "gpu/intel/ocl/ref_pooling.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -48,7 +48,7 @@ const std::map> GPU_INSTANCE_INTEL(intel::jit::gen_pooling_fwd_t) GPU_INSTANCE_INTEL(intel::ocl::gen9_global_pooling_fwd_t) GPU_INSTANCE_INTEL(intel::ocl::gen9_pooling_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_pooling_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_pooling_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_pooling_fwd_t) GPU_INSTANCE_AMD(amd::miopen_pooling_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_pooling_fwd_t) @@ -57,7 +57,7 @@ const std::map> {{backward}, REG_BWD_PK({ GPU_INSTANCE_INTEL(intel::ocl::gen9_global_pooling_bwd_t) GPU_INSTANCE_INTEL(intel::ocl::gen9_pooling_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_pooling_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_pooling_bwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_pooling_bwd_t) GPU_INSTANCE_AMD(amd::miopen_pooling_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_pooling_bwd_t) diff --git a/src/gpu/gpu_prelu_list.cpp b/src/gpu/gpu_prelu_list.cpp index a649197a6d2..6961aece191 100644 --- a/src/gpu/gpu_prelu_list.cpp +++ b/src/gpu/gpu_prelu_list.cpp @@ -18,9 +18,7 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/ref_prelu.hpp" -#endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA #include "gpu/generic/sycl/ref_prelu.hpp" @@ -37,12 +35,12 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_PRELU_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::ref_prelu_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_prelu_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_prelu_fwd_t) nullptr, }}, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::ref_prelu_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_prelu_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_prelu_bwd_t) nullptr, })}, diff --git a/src/gpu/gpu_reduction_list.cpp b/src/gpu/gpu_reduction_list.cpp index b29c238e04a..85f8a956fb3 100644 --- a/src/gpu/gpu_reduction_list.cpp +++ b/src/gpu/gpu_reduction_list.cpp @@ -16,12 +16,13 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/reduction/atomic_reduction.hpp" #include "gpu/intel/ocl/reduction/combined_reduction.hpp" #include "gpu/intel/ocl/reduction/ref_reduction.hpp" #include "gpu/intel/ocl/reduction/reusable_ref_reduction.hpp" +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL + #ifdef DNNL_DEV_MODE #include "gpu/intel/jit/jit_reduction.hpp" #endif @@ -45,10 +46,10 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_REDUCTION_P({ GPU_INSTANCE_INTEL_DEVMODE(intel::jit::jit_reduction_t) - GPU_INSTANCE_INTEL(intel::ocl::atomic_reduction_t) - GPU_INSTANCE_INTEL(intel::ocl::combined_reduction_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_reduction_t) - GPU_INSTANCE_INTEL(intel::ocl::reusable_ref_reduction_t) + GPU_INSTANCE_GENERIC(intel::ocl::atomic_reduction_t) + GPU_INSTANCE_GENERIC(intel::ocl::combined_reduction_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_reduction_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_ref_reduction_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_reduction_t) GPU_INSTANCE_AMD(amd::miopen_reduction_t) nullptr, diff --git a/src/gpu/gpu_reorder_list.cpp b/src/gpu/gpu_reorder_list.cpp index be52bad91f5..8dff445259d 100644 --- a/src/gpu/gpu_reorder_list.cpp +++ b/src/gpu/gpu_reorder_list.cpp @@ -18,13 +18,13 @@ #include "gpu/generic/cross_engine_reorder.hpp" #include "gpu/generic/direct_copy.hpp" - -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL -#include "gpu/intel/jit/reorder/gen_reorder.hpp" #include "gpu/intel/ocl/custom_reorder.hpp" #include "gpu/intel/ocl/generic_reorder.hpp" #include "gpu/intel/ocl/ref_reorder.hpp" #include "gpu/intel/ocl/rnn/rnn_reorders.hpp" + +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL +#include "gpu/intel/jit/reorder/gen_reorder.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -46,12 +46,12 @@ using namespace dnnl::impl::data_type; // clang-format off constexpr impl_list_item_t impl_list[] = REG_REORDER_P({ - GPU_REORDER_INSTANCE_INTEL(intel::ocl::rnn_weights_reorder_t::pd_t) + GPU_REORDER_INSTANCE_GENERIC(intel::ocl::rnn_weights_reorder_t::pd_t) GPU_REORDER_INSTANCE_GENERIC(generic::direct_copy_t::pd_t) GPU_REORDER_INSTANCE_INTEL(intel::jit::gen_reorder_t::pd_t) - GPU_REORDER_INSTANCE_INTEL(intel::ocl::custom_reorder_t::pd_t) // for specific tensor shapes - GPU_REORDER_INSTANCE_INTEL(intel::ocl::generic_reorder_t::pd_t)// fast and quite generic - GPU_REORDER_INSTANCE_INTEL(intel::ocl::ref_reorder_t::pd_t) // slow but fits every use case + GPU_REORDER_INSTANCE_GENERIC(intel::ocl::custom_reorder_t::pd_t) // for specific tensor shapes + GPU_REORDER_INSTANCE_GENERIC(intel::ocl::generic_reorder_t::pd_t)// fast and quite generic + GPU_REORDER_INSTANCE_GENERIC(intel::ocl::ref_reorder_t::pd_t) // slow but fits every use case GPU_REORDER_INSTANCE_NVIDIA(nvidia::cudnn_reorder_t::pd_t) GPU_REORDER_INSTANCE_AMD(amd::miopen_reorder_t::pd_t) GPU_REORDER_INSTANCE_GENERIC_SYCL(generic::sycl::ref_reorder_t::pd_t) diff --git a/src/gpu/gpu_resampling_list.cpp b/src/gpu/gpu_resampling_list.cpp index e9a623c0438..d33696c4ce8 100644 --- a/src/gpu/gpu_resampling_list.cpp +++ b/src/gpu/gpu_resampling_list.cpp @@ -16,10 +16,8 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/ref_resampling.hpp" #include "gpu/intel/ocl/vectorized_resampling.hpp" -#endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA #include "gpu/generic/sycl/ref_resampling.hpp" @@ -37,14 +35,14 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_RESAMPLING_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::ref_resampling_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_resampling_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_resampling_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_resampling_fwd_t) nullptr, }}, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::vectorized_resampling_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_resampling_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::vectorized_resampling_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_resampling_bwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_resampling_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_resampling_bwd_t) nullptr, diff --git a/src/gpu/gpu_rnn_list.cpp b/src/gpu/gpu_rnn_list.cpp index 7e9526abf61..0bf74d7a245 100644 --- a/src/gpu/gpu_rnn_list.cpp +++ b/src/gpu/gpu_rnn_list.cpp @@ -16,9 +16,7 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/rnn/rnn_grid.hpp" -#endif namespace dnnl { namespace impl { @@ -31,11 +29,11 @@ using namespace dnnl::impl::prop_kind; const std::map> impl_list_map REG_RNN_P({ {{forward}, { - GPU_INSTANCE_INTEL(intel::ocl::simple_rnn_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_rnn_fwd_t) nullptr, }}, {{backward}, REG_BWD_PK({ - GPU_INSTANCE_INTEL(intel::ocl::simple_rnn_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_rnn_bwd_t) nullptr, })}, }); diff --git a/src/gpu/gpu_sdpa_list.cpp b/src/gpu/gpu_sdpa_list.cpp index 1c9230f95c1..a101c190c8e 100644 --- a/src/gpu/gpu_sdpa_list.cpp +++ b/src/gpu/gpu_sdpa_list.cpp @@ -30,7 +30,7 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_SDPA_P({ GPU_INSTANCE_INTEL(intel::ocl::micro_sdpa_t) - GPU_INSTANCE_INTEL_DEVMODE(intel::ocl::ref_sdpa_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_sdpa_t) nullptr, }); // clang-format on diff --git a/src/gpu/gpu_shuffle_list.cpp b/src/gpu/gpu_shuffle_list.cpp index fe3ef9c2e9c..4f867b987ab 100644 --- a/src/gpu/gpu_shuffle_list.cpp +++ b/src/gpu/gpu_shuffle_list.cpp @@ -16,10 +16,8 @@ #include "gpu/gpu_impl_list.hpp" -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/ref_shuffle.hpp" #include "gpu/intel/ocl/shuffle_by_reorder.hpp" -#endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA #include "gpu/generic/sycl/ref_shuffle.hpp" @@ -33,8 +31,8 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_SHUFFLE_P({ - GPU_INSTANCE_INTEL(intel::ocl::shuffle_by_reorder_t) - GPU_INSTANCE_INTEL(intel::ocl::ref_shuffle_t) + GPU_INSTANCE_GENERIC(intel::ocl::shuffle_by_reorder_t) + GPU_INSTANCE_GENERIC(intel::ocl::ref_shuffle_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_shuffle_t) nullptr, }); diff --git a/src/gpu/gpu_softmax_list.cpp b/src/gpu/gpu_softmax_list.cpp index fbbd595b941..b28d57c1268 100644 --- a/src/gpu/gpu_softmax_list.cpp +++ b/src/gpu/gpu_softmax_list.cpp @@ -15,11 +15,11 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" +#include "gpu/intel/ocl/reusable_softmax.hpp" +#include "gpu/intel/ocl/simple_softmax.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/gen9_softmax.hpp" -#include "gpu/intel/ocl/reusable_softmax.hpp" -#include "gpu/intel/ocl/simple_softmax.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -43,8 +43,8 @@ const std::map> impl_list_map REG_SOFTMAX_P({ {{forward}, { GPU_INSTANCE_INTEL(intel::ocl::gen9_softmax_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::simple_softmax_fwd_t) - GPU_INSTANCE_INTEL(intel::ocl::reusable_softmax_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_softmax_fwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::reusable_softmax_fwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_softmax_fwd_t) GPU_INSTANCE_AMD(amd::miopen_softmax_fwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_sycl_softmax_fwd_t) @@ -52,7 +52,7 @@ const std::map> }}, {{backward}, REG_BWD_PK({ GPU_INSTANCE_INTEL(intel::ocl::gen9_softmax_bwd_t) - GPU_INSTANCE_INTEL(intel::ocl::simple_softmax_bwd_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_softmax_bwd_t) GPU_INSTANCE_NVIDIA(nvidia::cudnn_softmax_bwd_t) GPU_INSTANCE_AMD(amd::miopen_softmax_bwd_t) GPU_INSTANCE_GENERIC_SYCL(generic::sycl::ref_sycl_softmax_bwd_t) diff --git a/src/gpu/gpu_sum_list.cpp b/src/gpu/gpu_sum_list.cpp index acf21625e7c..af05ca44afe 100644 --- a/src/gpu/gpu_sum_list.cpp +++ b/src/gpu/gpu_sum_list.cpp @@ -20,13 +20,13 @@ #include "gpu/gpu_sum_pd.hpp" #include "gpu/generic/ref_sum.hpp" +#include "gpu/intel/ocl/many_inputs_sum.hpp" +#include "gpu/intel/ocl/multi_po_reorder_sum.hpp" +#include "gpu/intel/ocl/simple_sum.hpp" #if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/jit/gen9_simple_sum.hpp" #include "gpu/intel/ocl/gen9_sum.hpp" -#include "gpu/intel/ocl/many_inputs_sum.hpp" -#include "gpu/intel/ocl/multi_po_reorder_sum.hpp" -#include "gpu/intel/ocl/simple_sum.hpp" #endif #if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA @@ -44,10 +44,10 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = REG_SUM_P({ - GPU_SUM_INSTANCE_INTEL(intel::ocl::multi_po_reorder_sum) + GPU_SUM_INSTANCE_GENERIC(intel::ocl::multi_po_reorder_sum) GPU_SUM_INSTANCE_INTEL(intel::ocl::gen9_sum_t) - GPU_SUM_INSTANCE_INTEL(intel::ocl::many_inputs_sum_t) - GPU_SUM_INSTANCE_INTEL(intel::ocl::simple_sum_t) + GPU_SUM_INSTANCE_GENERIC(intel::ocl::many_inputs_sum_t) + GPU_SUM_INSTANCE_GENERIC(intel::ocl::simple_sum_t) GPU_SUM_INSTANCE_NVIDIA(nvidia::cudnn_ref_sum_t) GPU_SUM_INSTANCE_GENERIC(generic::ref_sum_t) nullptr, diff --git a/src/gpu/gpu_zero_pad_list.cpp b/src/gpu/gpu_zero_pad_list.cpp index 4c17827ce72..501d92bfd91 100644 --- a/src/gpu/gpu_zero_pad_list.cpp +++ b/src/gpu/gpu_zero_pad_list.cpp @@ -15,10 +15,7 @@ *******************************************************************************/ #include "gpu/gpu_impl_list.hpp" - -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/simple_zero_pad.hpp" -#endif namespace dnnl { namespace impl { @@ -28,7 +25,7 @@ namespace { // clang-format off constexpr impl_list_item_t impl_list[] = { - GPU_INSTANCE_INTEL(intel::ocl::simple_zero_pad_t) + GPU_INSTANCE_GENERIC(intel::ocl::simple_zero_pad_t) nullptr, }; // clang-format on diff --git a/src/gpu/intel/CMakeLists.txt b/src/gpu/intel/CMakeLists.txt index 994bfb9af39..b3d4b538db9 100644 --- a/src/gpu/intel/CMakeLists.txt +++ b/src/gpu/intel/CMakeLists.txt @@ -19,16 +19,20 @@ file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ) -add_definitions_with_host_compiler(-DNGEN_CPP11) -add_definitions_with_host_compiler(-DNGEN_SAFE) -add_definitions_with_host_compiler(-DNGEN_NEO_INTERFACE) -add_definitions_with_host_compiler(-DNGEN_NO_OP_NAMES) -add_definitions_with_host_compiler(-DNGEN_WINDOWS_COMPAT) - add_subdirectory(compute) -add_subdirectory(microkernels) -add_subdirectory(jit) -add_subdirectory(ocl) +if(DNNL_GPU_VENDOR STREQUAL "INTEL") + add_definitions_with_host_compiler(-DNGEN_CPP11) + add_definitions_with_host_compiler(-DNGEN_SAFE) + add_definitions_with_host_compiler(-DNGEN_NEO_INTERFACE) + add_definitions_with_host_compiler(-DNGEN_NO_OP_NAMES) + add_definitions_with_host_compiler(-DNGEN_WINDOWS_COMPAT) + add_subdirectory(microkernels) + add_subdirectory(jit) +endif() + +if(DNNL_WITH_OCL) + add_subdirectory(ocl) +endif() if(DNNL_WITH_SYCL) add_subdirectory(sycl) diff --git a/src/gpu/intel/compute/device_info.cpp b/src/gpu/intel/compute/device_info.cpp index 237dded47b3..52296eef40f 100644 --- a/src/gpu/intel/compute/device_info.cpp +++ b/src/gpu/intel/compute/device_info.cpp @@ -20,7 +20,9 @@ #include "common/type_helpers.hpp" #include "gpu/intel/compute/device_info.hpp" +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/jit/utils/ngen_type_bridge.hpp" +#endif #include "gpu/intel/utils.hpp" #ifdef DNNL_WITH_SYCL @@ -129,8 +131,12 @@ int device_info_t::max_subgroup_size(gpu_arch_t gpu_arch) { } int device_info_t::grf_size(gpu_arch_t gpu_arch) { + int size = 0; +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL ngen::HW hw = jit::convert_dnnl_arch_to_ngen(gpu_arch); - return ngen::GRF::bytes(hw); + size = ngen::GRF::bytes(hw); +#endif + return size; } int device_info_t::min_subgroup_size() const { diff --git a/src/gpu/intel/ocl/CMakeLists.txt b/src/gpu/intel/ocl/CMakeLists.txt index 237eb8ba01e..b4cfda5cd9b 100644 --- a/src/gpu/intel/ocl/CMakeLists.txt +++ b/src/gpu/intel/ocl/CMakeLists.txt @@ -26,6 +26,11 @@ include("${PROJECT_SOURCE_DIR}/cmake/gen_gpu_kernel_list.cmake") file(GLOB_RECURSE CL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cl) file(GLOB_RECURSE CL_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +if(NOT DNNL_GPU_VENDOR STREQUAL "INTEL") + list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/micro_sdpa.cpp") + list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ocl_gpu_hw_info.cpp") +endif() + set(kernel_list_templ "${PROJECT_SOURCE_DIR}/src/gpu/intel/ocl/ocl_kernel_list.cpp.in") set(kernel_list_src "${PROJECT_BINARY_DIR}/src/gpu/intel/ocl/ocl_kernel_list.cpp") diff --git a/src/gpu/intel/ocl/ocl_gpu_device_info.cpp b/src/gpu/intel/ocl/ocl_gpu_device_info.cpp index f79047823d7..2241faaa0fd 100644 --- a/src/gpu/intel/ocl/ocl_gpu_device_info.cpp +++ b/src/gpu/intel/ocl/ocl_gpu_device_info.cpp @@ -42,9 +42,11 @@ status_t ocl_gpu_device_info_t::init_arch(impl::engine_t *engine) { = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err); OCL_CHECK(err); +#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL init_gpu_hw_info(engine, device, context, ip_version_, gpu_arch_, gpu_product_family_, stepping_id_, native_extensions_, mayiuse_systolic_, mayiuse_ngen_kernels_); +#endif err = clReleaseContext(context); OCL_CHECK(err); diff --git a/src/xpu/CMakeLists.txt b/src/xpu/CMakeLists.txt index 8cf11d5c8d2..f4ab64cd3fe 100644 --- a/src/xpu/CMakeLists.txt +++ b/src/xpu/CMakeLists.txt @@ -25,7 +25,7 @@ if(DNNL_WITH_SYCL) add_subdirectory(sycl) endif() -if(DNNL_GPU_VENDOR STREQUAL "INTEL") +if(DNNL_WITH_OCL) add_subdirectory(ocl) endif() diff --git a/src/xpu/ocl/usm_utils.cpp b/src/xpu/ocl/usm_utils.cpp index 06b267a0604..13baac86ee3 100644 --- a/src/xpu/ocl/usm_utils.cpp +++ b/src/xpu/ocl/usm_utils.cpp @@ -15,8 +15,6 @@ *******************************************************************************/ #include "xpu/ocl/usm_utils.hpp" - -#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL #include "gpu/intel/ocl/usm_utils.hpp" #define HANDLE_USM_CALL_V(e, ...) \ @@ -27,10 +25,6 @@ assert(e->kind() == engine_kind::gpu); \ return gpu::intel::ocl::usm::__VA_ARGS__ -#else -#error "Unsupported vendor" -#endif - namespace dnnl { namespace impl { namespace xpu {