diff --git a/bin/hipify-perl b/bin/hipify-perl index d849d0e6..517b60ab 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1550,7 +1550,9 @@ sub rocSubstitutions { subst("cublasCdotu_v2", "rocblas_cdotu", "library"); subst("cublasCdotu_v2_64", "rocblas_cdotu_64", "library"); subst("cublasCgbmv", "rocblas_cgbmv", "library"); + subst("cublasCgbmv_64", "rocblas_cgbmv_64", "library"); subst("cublasCgbmv_v2", "rocblas_cgbmv", "library"); + subst("cublasCgbmv_v2_64", "rocblas_cgbmv_64", "library"); subst("cublasCgeam", "rocblas_cgeam", "library"); subst("cublasCgemm", "rocblas_cgemm", "library"); subst("cublasCgemmBatched", "rocblas_cgemm_batched", "library"); @@ -1659,7 +1661,9 @@ sub rocSubstitutions { subst("cublasDestroy", "rocblas_destroy_handle", "library"); subst("cublasDestroy_v2", "rocblas_destroy_handle", "library"); subst("cublasDgbmv", "rocblas_dgbmv", "library"); + subst("cublasDgbmv_64", "rocblas_dgbmv_64", "library"); subst("cublasDgbmv_v2", "rocblas_dgbmv", "library"); + subst("cublasDgbmv_v2_64", "rocblas_dgbmv_64", "library"); subst("cublasDgeam", "rocblas_dgeam", "library"); subst("cublasDgemm", "rocblas_dgemm", "library"); subst("cublasDgemmBatched", "rocblas_dgemm_batched", "library"); @@ -1839,7 +1843,9 @@ sub rocSubstitutions { subst("cublasSetVector", "rocblas_set_vector", "library"); subst("cublasSetVectorAsync", "rocblas_set_vector_async", "library"); subst("cublasSgbmv", "rocblas_sgbmv", "library"); + subst("cublasSgbmv_64", "rocblas_sgbmv_64", "library"); subst("cublasSgbmv_v2", "rocblas_sgbmv", "library"); + subst("cublasSgbmv_v2_64", "rocblas_sgbmv_64", "library"); subst("cublasSgeam", "rocblas_sgeam", "library"); subst("cublasSgemm", "rocblas_sgemm", "library"); subst("cublasSgemmBatched", "rocblas_sgemm_batched", "library"); @@ -1941,7 +1947,9 @@ sub rocSubstitutions { subst("cublasZdscal_v2", "rocblas_zdscal", "library"); subst("cublasZdscal_v2_64", "rocblas_zdscal_64", "library"); subst("cublasZgbmv", "rocblas_zgbmv", "library"); + subst("cublasZgbmv_64", "rocblas_zgbmv_64", "library"); subst("cublasZgbmv_v2", "rocblas_zgbmv", "library"); + subst("cublasZgbmv_v2_64", "rocblas_zgbmv_64", "library"); subst("cublasZgeam", "rocblas_zgeam", "library"); subst("cublasZgemm", "rocblas_zgemm", "library"); subst("cublasZgemmBatched", "rocblas_zgemm_batched", "library"); @@ -12479,8 +12487,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgemm3m", "cublasZgelsBatched", "cublasZgeam_64", - "cublasZgbmv_v2_64", - "cublasZgbmv_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -12554,8 +12560,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgemmBatched_64", "cublasSgelsBatched", "cublasSgeam_64", - "cublasSgbmv_v2_64", - "cublasSgbmv_64", "cublasSetVector_64", "cublasSetVectorAsync_64", "cublasSetSmCountTarget", @@ -12724,8 +12728,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmBatched_64", "cublasDgelsBatched", "cublasDgeam_64", - "cublasDgbmv_v2_64", - "cublasDgbmv_64", "cublasDdgmm_64", "cublasCtrttp", "cublasCtrsv_v2_64", @@ -12819,8 +12821,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemm3m", "cublasCgelsBatched", "cublasCgeam_64", - "cublasCgbmv_v2_64", - "cublasCgbmv_64", "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 187eac7f..970ad256 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -723,9 +723,9 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCgbmv`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | | |`rocblas_cgbmv_64`|6.2.0| | | | | |`cublasCgbmv_v2`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | | |`rocblas_cgbmv_64`|6.2.0| | | | | |`cublasCgemv`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | |`cublasCgemv_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | | | | | | | | | |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | @@ -803,9 +803,9 @@ |`cublasCtrsv_v2`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | |`cublasCtrsv_v2_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasDgbmv`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | -|`cublasDgbmv_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | | | | | | | | | +|`cublasDgbmv_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | | |`rocblas_dgbmv_64`|6.2.0| | | | | |`cublasDgbmv_v2`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | -|`cublasDgbmv_v2_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | | | | | | | | | +|`cublasDgbmv_v2_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | | |`rocblas_dgbmv_64`|6.2.0| | | | | |`cublasDgemv`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | |`cublasDgemv_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | | | | | | | | | |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | @@ -867,9 +867,9 @@ |`cublasDtrsv_v2`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | |`cublasDtrsv_v2_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | | | | | | | | | |`cublasSgbmv`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | -|`cublasSgbmv_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | | | | | | | | | +|`cublasSgbmv_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | | |`rocblas_sgbmv_64`|6.2.0| | | | | |`cublasSgbmv_v2`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | -|`cublasSgbmv_v2_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | | | | | | | | | +|`cublasSgbmv_v2_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | | |`rocblas_sgbmv_64`|6.2.0| | | | | |`cublasSgemv`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | |`cublasSgemv_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | | | | | | | | | |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | @@ -931,9 +931,9 @@ |`cublasStrsv_v2`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | |`cublasStrsv_v2_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | | | | | | | | | |`cublasZgbmv`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | | |`rocblas_zgbmv_64`|6.2.0| | | | | |`cublasZgbmv_v2`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | | |`rocblas_zgbmv_64`|6.2.0| | | | | |`cublasZgemv`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | |`cublasZgemv_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | | | | | | | | | |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 7f509495..22962374 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -723,9 +723,9 @@ |**CUDA**|**A**|**D**|**C**|**R**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCgbmv`| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_64`|12.0| | | | | | | | | | +|`cublasCgbmv_64`|12.0| | | |`rocblas_cgbmv_64`|6.2.0| | | | | |`cublasCgbmv_v2`| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_v2_64`|12.0| | | | | | | | | | +|`cublasCgbmv_v2_64`|12.0| | | |`rocblas_cgbmv_64`|6.2.0| | | | | |`cublasCgemv`| | | | |`rocblas_cgemv`|1.5.0| | | | | |`cublasCgemv_64`|12.0| | | | | | | | | | |`cublasCgemv_v2`| | | | |`rocblas_cgemv`|1.5.0| | | | | @@ -803,9 +803,9 @@ |`cublasCtrsv_v2`| | | | |`rocblas_ctrsv`|3.5.0| | | | | |`cublasCtrsv_v2_64`|12.0| | | | | | | | | | |`cublasDgbmv`| | | | |`rocblas_dgbmv`|3.5.0| | | | | -|`cublasDgbmv_64`|12.0| | | | | | | | | | +|`cublasDgbmv_64`|12.0| | | |`rocblas_dgbmv_64`|6.2.0| | | | | |`cublasDgbmv_v2`| | | | |`rocblas_dgbmv`|3.5.0| | | | | -|`cublasDgbmv_v2_64`|12.0| | | | | | | | | | +|`cublasDgbmv_v2_64`|12.0| | | |`rocblas_dgbmv_64`|6.2.0| | | | | |`cublasDgemv`| | | | |`rocblas_dgemv`|1.5.0| | | | | |`cublasDgemv_64`|12.0| | | | | | | | | | |`cublasDgemv_v2`| | | | |`rocblas_dgemv`|1.5.0| | | | | @@ -867,9 +867,9 @@ |`cublasDtrsv_v2`| | | | |`rocblas_dtrsv`|3.5.0| | | | | |`cublasDtrsv_v2_64`|12.0| | | | | | | | | | |`cublasSgbmv`| | | | |`rocblas_sgbmv`|3.5.0| | | | | -|`cublasSgbmv_64`|12.0| | | | | | | | | | +|`cublasSgbmv_64`|12.0| | | |`rocblas_sgbmv_64`|6.2.0| | | | | |`cublasSgbmv_v2`| | | | |`rocblas_sgbmv`|3.5.0| | | | | -|`cublasSgbmv_v2_64`|12.0| | | | | | | | | | +|`cublasSgbmv_v2_64`|12.0| | | |`rocblas_sgbmv_64`|6.2.0| | | | | |`cublasSgemv`| | | | |`rocblas_sgemv`|1.5.0| | | | | |`cublasSgemv_64`|12.0| | | | | | | | | | |`cublasSgemv_v2`| | | | |`rocblas_sgemv`|1.5.0| | | | | @@ -931,9 +931,9 @@ |`cublasStrsv_v2`| | | | |`rocblas_strsv`|3.5.0| | | | | |`cublasStrsv_v2_64`|12.0| | | | | | | | | | |`cublasZgbmv`| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_64`|12.0| | | | | | | | | | +|`cublasZgbmv_64`|12.0| | | |`rocblas_zgbmv_64`|6.2.0| | | | | |`cublasZgbmv_v2`| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_v2_64`|12.0| | | | | | | | | | +|`cublasZgbmv_v2_64`|12.0| | | |`rocblas_zgbmv_64`|6.2.0| | | | | |`cublasZgemv`| | | | |`rocblas_zgemv`|1.5.0| | | | | |`cublasZgemv_64`|12.0| | | | | | | | | | |`cublasZgemv_v2`| | | | |`rocblas_zgemv`|1.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index ddf83fee..6102707c 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -232,13 +232,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GBMV {"cublasSgbmv", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgbmv_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSgbmv_64", {"hipblasSgbmv_64", "rocblas_sgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDgbmv", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgbmv_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDgbmv_64", {"hipblasDgbmv_64", "rocblas_dgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgbmv", {"hipblasCgbmv_v2", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgbmv_64", {"hipblasCgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgbmv_64", {"hipblasCgbmv_v2_64", "rocblas_cgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgbmv", {"hipblasZgbmv_v2", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgbmv_64", {"hipblasZgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgbmv_64", {"hipblasZgbmv_v2_64", "rocblas_zgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TRMV {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -650,13 +650,13 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GBMV {"cublasSgbmv_v2", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSgbmv_v2_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSgbmv_v2_64", {"hipblasSgbmv_64", "rocblas_sgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDgbmv_v2", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDgbmv_v2_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDgbmv_v2_64", {"hipblasDgbmv_64", "rocblas_dgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCgbmv_v2", {"hipblasCgbmv_v2", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgbmv_v2_64", {"hipblasCgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCgbmv_v2_64", {"hipblasCgbmv_v2_64", "rocblas_cgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZgbmv_v2", {"hipblasZgbmv_v2", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgbmv_v2_64", {"hipblasZgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZgbmv_v2_64", {"hipblasZgbmv_v2_64", "rocblas_zgbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TRMV {"cublasStrmv_v2", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -2311,6 +2311,10 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_nrm2_ex_64", {HIP_6010, HIP_0, HIP_0 }}, {"rocblas_rot_ex_64", {HIP_6010, HIP_0, HIP_0 }}, {"rocblas_scal_ex_64", {HIP_6010, HIP_0, HIP_0 }}, + {"rocblas_sgbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dgbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cgbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zgbmv_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 1f9d5dd3..c336a02b 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -153,8 +153,10 @@ int main() { int64_t n_64 = 0; int nrhs = 0; int m = 0; + int64_t m_64 = 0; int num = 0; int lda = 0; + int64_t lda_64 = 0; int ldb = 0; int ldc = 0; int res = 0; @@ -165,7 +167,9 @@ int main() { int64_t incy_64 = 0; int k = 0; int kl = 0; + int64_t kl_64 = 0; int ku = 0; + int64_t ku_64 = 0; int batchCount = 0; void *image = nullptr; void *image_2 = nullptr; @@ -240,6 +244,7 @@ int main() { float fa = 0; float fA = 0; + float fAP = 0.0f; float fb = 0; float fB = 0; float fx = 0; @@ -2302,6 +2307,34 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_scal_ex_64(rocblas_handle handle, int64_t n, const void* alpha, rocblas_datatype alpha_type, void* x, rocblas_datatype x_type, int64_t incx, rocblas_datatype execution_type); // CHECK: blasStatus = rocblas_scal_ex_64(blasHandle, n_64, valpha, alpha_type, vx, x_type, incx_64, execution_type); blasStatus = cublasScalEx_64(blasHandle, n_64, valpha, alpha_type, vx, x_type, incx_64, execution_type); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgbmv_64(rocblas_handle handle, rocblas_operation trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = rocblas_sgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_sgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgbmv_64(rocblas_handle handle, rocblas_operation trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = rocblas_dgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_dgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgbmv_64(rocblas_handle handle, rocblas_operation trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* beta, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_cgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_cgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgbmv_64(rocblas_handle handle, rocblas_operation trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* beta, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); #endif return 0;