From 69e3981bdf03a83c83106d199fc1fe4aa2c498eb Mon Sep 17 00:00:00 2001 From: tvukovic-amd <127323445+tvukovic-amd@users.noreply.github.com> Date: Thu, 9 May 2024 22:56:09 +0200 Subject: [PATCH] Remove rocblas on windows (#2966) --- CMakeLists.txt | 8 ++- src/CMakeLists.txt | 5 +- src/targets/gpu/CMakeLists.txt | 68 ++++++++++++------- src/targets/gpu/compile_miopen.cpp | 3 +- src/targets/gpu/fuse_mlir.cpp | 2 +- src/targets/gpu/fuse_ops.cpp | 4 ++ src/targets/gpu/gemm_impl.cpp | 4 +- .../include/migraphx/gpu/compile_miopen.hpp | 2 +- .../gpu/include/migraphx/gpu/context.hpp | 11 +-- src/targets/gpu/include/migraphx/gpu/gemm.hpp | 5 +- .../gpu/include/migraphx/gpu/rocblas.hpp | 7 +- src/targets/gpu/lowering.cpp | 7 +- src/targets/gpu/rocblas.cpp | 4 +- src/targets/gpu/target.cpp | 2 + test/gpu/gemm_tune.cpp | 4 +- 15 files changed, 89 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 72cefdb6c88..69b433e871a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,8 @@ else() option(MIGRAPHX_ENABLE_PYTHON "Enable python bindings" ON) endif() +option(MIGRAPHX_USE_ROCBLAS "Enable MIGraphX to use rocBLAS" ON) + # By default build shared libraries option(BUILD_SHARED_LIBS "Create shared libraries" ON) @@ -334,11 +336,15 @@ else() set(DEPENDS_HIP_RUNTIME "hip-runtime-amd" ) endif() +if(MIGRAPHX_USE_ROCBLAS) + list(APPEND PACKAGE_DEPENDS rocblas) +endif() + rocm_create_package( NAME MIGraphX DESCRIPTION "AMD's graph optimizer" MAINTAINER "AMDMIGraphX Maintainer " LDCONFIG PTH - DEPENDS miopen-hip rocblas ${DEPENDS_HIP_RUNTIME} hip-base half ${PACKAGE_DEPENDS} + DEPENDS miopen-hip ${DEPENDS_HIP_RUNTIME} hip-base half ${PACKAGE_DEPENDS} ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8ca3cdbb71b..eedc7a27501 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -331,7 +331,10 @@ target_link_libraries(migraphx_all_targets INTERFACE migraphx_cpu) target_compile_definitions(migraphx_all_targets INTERFACE -DHAVE_CPU) endif() if(MIGRAPHX_ENABLE_GPU) -list(APPEND MIGRAPHX_CONFIG_DEPENDS PACKAGE MIOpen PACKAGE rocblas) + if(MIGRAPHX_USE_ROCBLAS) + list(APPEND MIGRAPHX_CONFIG_DEPENDS PACKAGE rocblas) + endif() + list(APPEND MIGRAPHX_CONFIG_DEPENDS PACKAGE MIOpen) add_subdirectory(targets/gpu) target_link_libraries(migraphx_all_targets INTERFACE migraphx_gpu) target_compile_definitions(migraphx_all_targets INTERFACE -DHAVE_GPU) diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt index 5d9fbabb3e2..d2e73ef1de7 100644 --- a/src/targets/gpu/CMakeLists.txt +++ b/src/targets/gpu/CMakeLists.txt @@ -33,9 +33,13 @@ endif() find_package(miopen REQUIRED) message(STATUS "MIGraphX is using MIOpen") -# rocblas -find_package(rocblas REQUIRED) -message(STATUS "MIGraphX build with rocBLAS") +if(MIGRAPHX_USE_ROCBLAS) + # rocblas + find_package(rocblas REQUIRED) + message(STATUS "MIGraphX build with rocBLAS") +else() + message(STATUS "MIGraphX build without rocBLAS") +endif() if(MIGRAPHX_USE_COMPOSABLEKERNEL) find_package(composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library) @@ -189,10 +193,12 @@ register_op(migraphx_gpu HEADER migraphx/gpu/rnn_variable_seq_lens.hpp OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output INCLUDES migraphx/gpu/context.hpp) -register_op(migraphx_gpu - HEADER migraphx/gpu/gemm.hpp - OPERATORS gpu::rocblas_gemm gpu::rocblas_gemm - INCLUDES migraphx/gpu/context.hpp) +if(MIGRAPHX_USE_ROCBLAS) + register_op(migraphx_gpu + HEADER migraphx/gpu/gemm.hpp + OPERATORS gpu::rocblas_gemm gpu::rocblas_gemm + INCLUDES migraphx/gpu/context.hpp) +endif() register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp OPERATORS gpu::miopen_convolution gpu::miopen_convolution gpu::miopen_convolution INCLUDES migraphx/gpu/context.hpp) @@ -260,13 +266,19 @@ target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_CXX_COMPILER="${CMAKE_CX include(CheckLibraryExists) get_target_property(MIOPEN_LOCATION MIOpen LOCATION) -get_target_property(ROCBLAS_LOCATION roc::rocblas LOCATION) -check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API) -# Beta API for automated GEMM tuning -check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_TUNING_BETA_FEATURE_API) -# rocblas FP8 API -check_library_exists(roc::rocblas "rocblas_gemm_strided_batched_ex3" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_FP8_BETA_API) +check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) + +if(MIGRAPHX_USE_ROCBLAS) + get_target_property(ROCBLAS_LOCATION roc::rocblas LOCATION) + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_ROCBLAS=1) + # Beta API for automated GEMM tuning + check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_TUNING_BETA_FEATURE_API) + # rocblas FP8 API + check_library_exists(roc::rocblas "rocblas_gemm_strided_batched_ex3" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_FP8_BETA_API) +else() + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_ROCBLAS=0) +endif() set(MIGRAPHX_USE_FIND_2_API "${HAS_FIND_2_API}" CACHE BOOL "") @@ -289,21 +301,27 @@ else() message(STATUS "MIOpen does not have find mode api") endif() -if(HAS_ROCBLAS_TUNING_BETA_FEATURE_API) - target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) - message(STATUS "MIGraphx is using Beta API of rocBLAS") -else() - message(STATUS "rocBLAS does not have User Tuning Beta API") -endif() +if(MIGRAPHX_USE_ROCBLAS) + if(HAS_ROCBLAS_TUNING_BETA_FEATURE_API) + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) + message(STATUS "MIGraphx is using Beta API of rocBLAS") + else() + message(STATUS "rocBLAS does not have User Tuning Beta API") + endif() -if(HAS_ROCBLAS_FP8_BETA_API) - target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_FP8_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) - message(STATUS "MIGraphX is using Beta API of rocBLAS for FP8 computations") -else() - message(STATUS "rocBLAS does not have Fp8 Beta API") + if(HAS_ROCBLAS_FP8_BETA_API) + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_FP8_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) + message(STATUS "MIGraphX is using Beta API of rocBLAS for FP8 computations") + else() + message(STATUS "rocBLAS does not have Fp8 Beta API") + endif() + + + target_link_libraries(migraphx_gpu PUBLIC roc::rocblas) endif() -target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas) +target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen) + target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels) if(MIGRAPHX_USE_COMPOSABLEKERNEL) target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library) diff --git a/src/targets/gpu/compile_miopen.cpp b/src/targets/gpu/compile_miopen.cpp index ce1583e8451..583601bdda1 100644 --- a/src/targets/gpu/compile_miopen.cpp +++ b/src/targets/gpu/compile_miopen.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,6 @@ #include #include #include -#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { diff --git a/src/targets/gpu/fuse_mlir.cpp b/src/targets/gpu/fuse_mlir.cpp index db074c94b92..8a4637d6e0c 100644 --- a/src/targets/gpu/fuse_mlir.cpp +++ b/src/targets/gpu/fuse_mlir.cpp @@ -580,7 +580,7 @@ void fuse_mlir::apply(module_pass_manager& mpm) const { #ifdef MIGRAPHX_MLIR const auto& device_name = ctx == nullptr ? "" : ctx->get_current_device().get_gfx_name(); - const bool is_navi = starts_with(device_name, "gfx110"); + const bool is_navi = starts_with(device_name, "gfx11"); auto get_mode = [&](std::string_view option, mlir_mode m1, mlir_mode m2 = mlir_mode::fast) { if(specific_op(option)) diff --git a/src/targets/gpu/fuse_ops.cpp b/src/targets/gpu/fuse_ops.cpp index e3a0aa06393..d97712a992e 100644 --- a/src/targets/gpu/fuse_ops.cpp +++ b/src/targets/gpu/fuse_ops.cpp @@ -550,6 +550,7 @@ struct find_conv_pointwise } }; +#if MIGRAPHX_USE_ROCBLAS struct find_gemm_pointwise { auto matcher() const @@ -675,6 +676,7 @@ struct find_gemm_pointwise m.replace_instruction(ins, gemm, inputs); } }; +#endif struct find_contiguous_tranpose_gemm { @@ -893,7 +895,9 @@ void fuse_ops::apply(module& m) const match::find_matches(m, find_conv_pointwise{ctx}, find_conv_bias_relu{ctx}, find_conv_bias{ctx}); run_passes(m, {dead_code_elimination{}}); match::find_matches(m, +#if MIGRAPHX_USE_ROCBLAS find_gemm_pointwise{}, +#endif find_layernorm_pointwise{}, find_concat_pointwise{}, find_contiguous_tranpose_gemm{}, diff --git a/src/targets/gpu/gemm_impl.cpp b/src/targets/gpu/gemm_impl.cpp index 1dc7ac4280e..fbe49eb6797 100644 --- a/src/targets/gpu/gemm_impl.cpp +++ b/src/targets/gpu/gemm_impl.cpp @@ -36,7 +36,7 @@ using microseconds = std::chrono::duration; namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { - +#if MIGRAPHX_USE_ROCBLAS /* Regular rocBLAS API takes compute_type as `rocblas_datatype` enum value v/s "ex3" BETA API takes it as `rocblas_computetype` enum value. `rb_compute_type` is faciliator to implictly cast integer enum @@ -678,7 +678,7 @@ int32_t gemm_finalize(context& ctx, return gemm_finalize_impl( ctx, output_shape, input_shapes, alpha, beta, compute_fp32, solution_idx); } - +#endif } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp b/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp index 197560d2cf1..03dd669e536 100644 --- a/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp +++ b/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index ef1d6259a4d..457e3712f87 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -107,7 +107,7 @@ struct hip_device assert(mihandle.get() != nullptr); return mihandle.get(); } - +#if MIGRAPHX_USE_ROCBLAS auto get_rocblas() { setup(); @@ -116,6 +116,7 @@ struct hip_device assert(rbhandle.get() != nullptr); return rbhandle.get(); } +#endif void wait() const { @@ -144,10 +145,12 @@ struct hip_device } private: - std::size_t id = 0; - shared s = nullptr; - shared mihandle = nullptr; + std::size_t id = 0; + shared s = nullptr; + shared mihandle = nullptr; +#if MIGRAPHX_USE_ROCBLAS shared rbhandle = nullptr; +#endif }; void add_stream() { streams.emplace_back(device_id); } diff --git a/src/targets/gpu/include/migraphx/gpu/gemm.hpp b/src/targets/gpu/include/migraphx/gpu/gemm.hpp index 321662888bf..f4ec57760a8 100644 --- a/src/targets/gpu/include/migraphx/gpu/gemm.hpp +++ b/src/targets/gpu/include/migraphx/gpu/gemm.hpp @@ -52,7 +52,6 @@ struct rocblas_gemm bool compute_fp32 = false; unsigned trans_batch = 0; int32_t solution_idx = 0; - template static auto reflect(Self& self, F f) { @@ -158,9 +157,7 @@ struct rocblas_gemm #endif } }; - +#endif } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx - -#endif diff --git a/src/targets/gpu/include/migraphx/gpu/rocblas.hpp b/src/targets/gpu/include/migraphx/gpu/rocblas.hpp index e72666e25ae..a03fa8385d2 100644 --- a/src/targets/gpu/include/migraphx/gpu/rocblas.hpp +++ b/src/targets/gpu/include/migraphx/gpu/rocblas.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,11 +25,14 @@ #define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP #include #include +#if MIGRAPHX_USE_ROCBLAS #include +#endif namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { +#if MIGRAPHX_USE_ROCBLAS using rocblas_handle_ptr = MIGRAPHX_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle); @@ -41,7 +44,7 @@ struct context; MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag(); MIGRAPHX_GPU_EXPORT bool rocblas_fp8_available(); - +#endif } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/targets/gpu/lowering.cpp b/src/targets/gpu/lowering.cpp index 51d924f6a5f..6c87d8fdc45 100644 --- a/src/targets/gpu/lowering.cpp +++ b/src/targets/gpu/lowering.cpp @@ -82,8 +82,9 @@ struct miopen_apply { assert(mod != nullptr); assert(pass != nullptr); - +#if MIGRAPHX_USE_ROCBLAS compute_fp32 = get_compute_fp32_flag(); +#endif offload_copy = (mod == mpm->get_root_module()) ? pass->offload_copy : false; add_generic_op("contiguous"); @@ -104,8 +105,10 @@ struct miopen_apply add_convolution_op("convolution"); add_convolution_op("convolution_backwards"); add_convolution_op("quant_convolution"); +#if MIGRAPHX_USE_ROCBLAS add_gemm_op("dot"); add_gemm_op("quant_dot"); +#endif add_if_op(); add_loop_op(); add_neg_op(); @@ -232,6 +235,7 @@ struct miopen_apply return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}})); } +#if MIGRAPHX_USE_ROCBLAS template void add_gemm_op(const std::string& name) { @@ -243,6 +247,7 @@ struct miopen_apply return mod->replace_instruction(ins, rocblas_gemm{Op{}, 1, 0, compute_fp32}, refs); }); } +#endif void add_convolution_op(const std::string& name) { diff --git a/src/targets/gpu/rocblas.cpp b/src/targets/gpu/rocblas.cpp index 1b37f08e1ed..5779f7ae6e1 100644 --- a/src/targets/gpu/rocblas.cpp +++ b/src/targets/gpu/rocblas.cpp @@ -32,7 +32,7 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { - +#if MIGRAPHX_USE_ROCBLAS rocblas_handle_ptr create_rocblas_handle_ptr() { // add a call to rocblas_initialize() to workaround a rocblas bug SWDEV-438929 @@ -63,7 +63,7 @@ bool rocblas_fp8_available() return gfx_has_fp8_intrinsics(); #endif } - +#endif } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/targets/gpu/target.cpp b/src/targets/gpu/target.cpp index 4a18e25aab5..3264f298f62 100644 --- a/src/targets/gpu/target.cpp +++ b/src/targets/gpu/target.cpp @@ -97,11 +97,13 @@ std::vector target::get_passes(migraphx::context& gctx, const compile_opti unsupported_types.erase(shape::type_t::tuple_type); // whiltelist supported Ops for the FP8 std::set unsupported_fp8_ops = {}; +#if MIGRAPHX_USE_ROCBLAS if(not gpu::rocblas_fp8_available()) { unsupported_fp8_ops.insert("dot"); unsupported_fp8_ops.insert("quant_dot"); } +#endif // MIOpen doesn't have support for fp8 pooling yet. unsupported_fp8_ops.insert("pooling"); if(not gpu::gfx_has_fp8_intrinsics()) diff --git a/test/gpu/gemm_tune.cpp b/test/gpu/gemm_tune.cpp index d1bd00c5a1c..75081bc37ba 100644 --- a/test/gpu/gemm_tune.cpp +++ b/test/gpu/gemm_tune.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -47,6 +47,7 @@ void run_lowering(migraphx::program& p, bool offload_copy = false) {migraphx::auto_contiguous{}, migraphx::gpu::lowering{&ctx, offload_copy}}); } +#if MIGRAPHX_USE_ROCBLAS /** * Tests the automatic GEMM tuning feature. In the finalize() method of the gemm op, * rocBLAS API functions are called to quickly benchmark all the GEMM solutions @@ -181,6 +182,7 @@ TEST_CASE(gemm_tune_strided_lowered) EXPECT(0 == solution_idx.to()); #endif } +#endif TEST_CASE(gemm_tune_invalid_sol_index) {