diff --git a/CMakeLists.txt b/CMakeLists.txt index 46cf416782..72ef595742 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ if(NOT WIN32 AND NOT APPLE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") endif() -rocm_setup_version(VERSION 3.00.0) +rocm_setup_version(VERSION 3.0.1) list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) include(TargetFlags) @@ -380,7 +380,7 @@ if(MIOPEN_USE_HIPRTC) message(FATAL_ERROR "HIPRTC can be used only together with COMGR") endif() find_package(hiprtc REQUIRED) - message(STATUS "Build with HIPRTC") + message(STATUS "Build with HIPRTC ${hiprtc_VERSION}") endif() option(Boost_USE_STATIC_LIBS "Use boost static libraries" ON) @@ -761,6 +761,19 @@ enable_cppcheck( knownConditionTrueFalse shadowFunction moduloofone + ################################################################### + # TODO Code Quality WORKAROUND ROCm 6.0 && + # Ubuntu 22.04 && cppcheck 2.12.1 update + ################################################################### + duplInheritedMember + constParameterCallback + constParameterReference + constParameterPointer + constVariableReference + constVariablePointer + useStlAlgorithm + uselessOverride + unusedScopedObject FORCE SOURCES addkernels/ diff --git a/Dockerfile b/Dockerfile index d958879d3c..36e4dc484e 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 as miopen +FROM ubuntu:22.04 as miopen ARG DEBIAN_FRONTEND=noninteractive # Support multiarch @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/5.7/ubuntu/focal/amdgpu-install_5.7.50700-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ - ./amdgpu-install_5.7.50700-1_all.deb + ./amdgpu-install_6.0.60000-1_all.deb # Add rocm repository -RUN export ROCM_APT_VER=5.7;\ +RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list' -RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms @@ -94,11 +94,17 @@ RUN rm -rf /tmp/ccache* && mkdir /tmp/ccache && wget https://github.com/ccache/c cd /tmp/ccache-${CCACHE_COMMIT}/build && \ cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/* RUN ccache -s + +# purge existing composable kernel installed with ROCm +# hence cannot use autoremove since it will remove more components +RUN apt-get update && \ +DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \ + composablekernel-dev ARG COMPILER_LAUNCHER="" RUN if [ "$USE_FIN" = "ON" ]; then \ - rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ else \ - rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ fi RUN ccache -s diff --git a/dev-requirements.txt b/dev-requirements.txt index 86ee06ae33..ddc6212455 100755 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,3 @@ ROCmSoftwarePlatform/rocm-recipes -f requirements.txt -danmar/cppcheck@2.9 +danmar/cppcheck@2.12.1 diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md index 3ae5db123a..b1e497efcc 100644 --- a/docs/DebugAndLogging.md +++ b/docs/DebugAndLogging.md @@ -94,7 +94,7 @@ Direct Solutions: * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`. -* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`. +* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1` diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index 2918a5aa94..64ac494812 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -28,6 +28,7 @@ cmake_minimum_required( VERSION 3.5) find_package(Threads REQUIRED) add_executable(MIOpenDriver main.cpp InputFlags.cpp) +target_include_directories(MIOpenDriver PRIVATE ../src/kernels) target_link_libraries(MIOpenDriver MIOpen) target_link_libraries(MIOpenDriver ${CMAKE_THREAD_LIBS_INIT}) if(NOT MIOPEN_EMBED_DB STREQUAL "") diff --git a/driver/driver.hpp b/driver/driver.hpp index 0862652cd5..7a1503ed74 100644 --- a/driver/driver.hpp +++ b/driver/driver.hpp @@ -44,7 +44,7 @@ #include using half = half_float::half; using hip_bfloat16 = bfloat16; -#include +#include using float16 = half_float::half; using float8 = miopen_f8::hip_f8; using bfloat8 = miopen_f8::hip_f8; diff --git a/driver/random.hpp b/driver/random.hpp index b3be81f56e..a55b73cc96 100644 --- a/driver/random.hpp +++ b/driver/random.hpp @@ -109,7 +109,9 @@ inline T gen_subnorm() if constexpr(!std::is_integral_v && !std::is_same_v && details::has_digits::value) { - using BitType = std::conditional_t; + using BitType = std::conditional_t>; static_assert(sizeof(T) == sizeof(BitType)); // -1 because ::digits counts the first implicit digit diff --git a/speedtests/CMakeLists.txt b/speedtests/CMakeLists.txt index 5832f43751..6f8086880a 100644 --- a/speedtests/CMakeLists.txt +++ b/speedtests/CMakeLists.txt @@ -16,7 +16,7 @@ function(add_speedtest_executable TEST_NAME) endif() separate_arguments(MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND ${MIOPEN_TEST_FLAGS}) target_link_libraries(${TEST_NAME} MIOpen) - target_include_directories(${TEST_NAME} PRIVATE ../test) + target_include_directories(${TEST_NAME} PRIVATE ../test ../src/kernels) endfunction(add_speedtest_executable) foreach(TEST ${TESTS}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2787733356..ace02095cc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -374,23 +374,27 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride1.inc kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride2.inc kernels/Conv_Winograd_v30_3_1_metadata.inc - kernels/xform_bidirect_winograd_code.inc - kernels/rocm_version.inc - kernels/inst_wrappers.inc + kernels/bfloat16_dev.hpp kernels/conv_common.inc - kernels/utilities.inc - kernels/xform_data_filter.inc - kernels/xform_kd_cov2.inc - kernels/xform_metadata.inc - kernels/neuron.inc kernels/conv_sizes.inc - kernels/gpr_alloc.inc - kernels/bfloat16_dev.hpp kernels/float_types.h - kernels/workaround_issue_1431.hpp + kernels/gpr_alloc.inc kernels/hip_f8_impl.hpp kernels/hip_float8.hpp + kernels/inst_wrappers.inc + kernels/miopen_cstdint.hpp + kernels/miopen_limits.hpp + kernels/miopen_type_traits.hpp + kernels/miopen_utility.hpp + kernels/neuron.inc + kernels/rocm_version.inc kernels/stride_array.hpp + kernels/utilities.inc + kernels/workaround_issue_1431.hpp + kernels/xform_bidirect_winograd_code.inc + kernels/xform_data_filter.inc + kernels/xform_kd_cov2.inc + kernels/xform_metadata.inc ) set(MIOPEN_KERNELS @@ -778,7 +782,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP") endif() if(ENABLE_HIP_WORKAROUNDS) # Workaround hip not setting its usage requirements correctly - target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 ) + target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 ) endif() # This is helpful for the tests target_link_libraries( MIOpen INTERFACE $ ) diff --git a/src/comgr.cpp b/src/comgr.cpp index 18f41c862d..0a38a31d05 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1292,15 +1292,20 @@ void BuildHip(const std::string& name, auto opts = miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); - opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 - opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? - opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? +#if HIP_PACKAGE_VERSION_MAJOR < 6 + opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? +#endif + opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1"); #endif opts.push_back("-DHIP_PACKAGE_VERSION_FLAT=" + std::to_string(HIP_PACKAGE_VERSION_FLAT)); - opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS=1"); + opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS"); + /// For now, use only standard to avoid possibility of + /// correctnes or performance regressions. + /// \todo Test and enable "custom" local implementation. + opts.push_back("-DWORKAROUND_DONT_USE_CUSTOM_LIMITS=1"); #if WORKAROUND_ISSUE_1431 if((StartsWith(target.Name(), "gfx10") || StartsWith(target.Name(), "gfx11")) && !miopen::comgr::IsWave64Enforced(opts)) diff --git a/src/composable_kernel/.clang-tidy b/src/composable_kernel/.clang-tidy index 5c2b781687..8d0880abcf 100644 --- a/src/composable_kernel/.clang-tidy +++ b/src/composable_kernel/.clang-tidy @@ -1,3 +1,3 @@ CheckOptions: - key: bugprone-reserved-identifier.AllowedIdentifiers - value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__' + value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__' diff --git a/src/composable_kernel/cmake/ClangTidy.cmake b/src/composable_kernel/cmake/ClangTidy.cmake index 01b348c458..7c17f91f1f 100644 --- a/src/composable_kernel/cmake/ClangTidy.cmake +++ b/src/composable_kernel/cmake/ClangTidy.cmake @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET) add_custom_target(${tidy_target} # for some targets clang-tidy not able to get information from .clang-tidy DEPENDS ${SOURCE} - COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" + COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..." ) diff --git a/src/composable_kernel/composable_kernel/include/utility/array.hpp b/src/composable_kernel/composable_kernel/include/utility/array.hpp index 23623560f7..63a912015c 100644 --- a/src/composable_kernel/composable_kernel/include/utility/array.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/array.hpp @@ -4,34 +4,7 @@ #include "functional2.hpp" #include "sequence.hpp" -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::forward. In some cases, it includes -/// (this is against the Standard, but it doesn't matter in this case). -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -constexpr T&& forward(typename remove_reference::type& t_) noexcept -{ - return static_cast(t_); -} - -template -constexpr T&& forward(typename remove_reference::type&& t_) noexcept -{ - return static_cast(t_); -} - -} // namespace std -#else -#include // std::forward -#endif -#endif // __HIPCC_RTC__ +#include "miopen_utility.hpp" // std::forward namespace ck { diff --git a/src/composable_kernel/composable_kernel/include/utility/config.hpp b/src/composable_kernel/composable_kernel/include/utility/config.hpp index 92307214f4..7869a075f2 100644 --- a/src/composable_kernel/composable_kernel/include/utility/config.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/config.hpp @@ -6,6 +6,7 @@ #include "hip/hip_fp16.h" #endif #include "bfloat16_dev.hpp" +#include "miopen_cstdint.hpp" // "Constant" address space for kernel parameter #define CONSTANT __attribute__((address_space(4))) diff --git a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp index 4d21f91e6a..01ae13a405 100644 --- a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp @@ -3,21 +3,8 @@ #include "statically_indexed_array.hpp" -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef signed char int8_t; -typedef signed short int16_t; -typedef float float_t; -#include // std::numeric_limits - -#else -#include // int8_t, int16_t -#include // float_t -#endif -#endif // __HIPCC_RTC__ +#include "miopen_cstdint.hpp" +#include "miopen_limits.hpp" namespace ck { @@ -978,7 +965,7 @@ struct inner_product_with_conversion return acc; } - __device__ T operator()(float_t a, float_t b) const { return convert(a) * convert(b); } + __device__ T operator()(float a, float b) const { return convert(a) * convert(b); } __device__ T operator()(int8x4_t a, int8x4_t b) const { diff --git a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp index 501e1bfc1c..30494214f8 100644 --- a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp @@ -1,6 +1,8 @@ #ifndef CK_ENABLE_IF_HPP #define CK_ENABLE_IF_HPP +#include "miopen_type_traits.hpp" + namespace ck { template diff --git a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp index b7489016e9..174c697501 100644 --- a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp @@ -6,6 +6,7 @@ #include "number.hpp" #include "type.hpp" #include "tuple.hpp" +#include "miopen_cstdint.hpp" namespace ck { diff --git a/src/composable_kernel/composable_kernel/include/utility/type.hpp b/src/composable_kernel/composable_kernel/include/utility/type.hpp index 4e5d4e5134..17769d52c0 100644 --- a/src/composable_kernel/composable_kernel/include/utility/type.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/type.hpp @@ -3,84 +3,7 @@ #include "integral_constant.hpp" #include "enable_if.hpp" - -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::remove_reference and std::remove_cv. -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; - -template -using remove_reference_t = typename remove_reference::type; - -template -struct remove_const -{ - typedef T type; -}; -template -struct remove_const -{ - typedef T type; -}; - -template -struct remove_volatile -{ - typedef T type; -}; -template -struct remove_volatile -{ - typedef T type; -}; - -template -struct remove_cv -{ - typedef typename remove_volatile::type>::type type; -}; - -template -struct is_pointer_helper : std::false_type -{ -}; - -template -struct is_pointer_helper : std::true_type -{ -}; - -template -struct is_pointer : is_pointer_helper::type> -{ -}; - -} // namespace std -#else -#include // std::remove_reference, std::remove_cv, is_pointer -#endif -#endif // __HIPCC_RTC__ +#include "miopen_type_traits.hpp" namespace ck { diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp index 52d00346cf..02e8da33a9 100644 --- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp +++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/convolution.cpp b/src/convolution.cpp index ac7c28fdc4..3c8e36e43a 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& } } - std::size_t out_c; + std::size_t out_c = 0; std::vector out_lens(spatial_dim + 2); auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim); diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index fad06870ce..3d49d78bbb 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -104,8 +104,6 @@ static inline rocblas_computetype rocBlasComputeType_ex3(const miopen::GemmDescr static inline rocblas_datatype rocBlasComputeType(const miopen::GemmDescriptor& desc) { - // Complex compute types are only supported in newer version of the API - assert(desc.dataType == desc.a_cast_type && desc.dataType == desc.b_cast_type); if(desc.dataType == miopenInt8) return rocblas_datatype::rocblas_datatype_i32_r; else diff --git a/src/include/miopen/hip_f8_impl.hpp b/src/include/miopen/hip_f8_impl.hpp deleted file mode 120000 index 22052778a0..0000000000 --- a/src/include/miopen/hip_f8_impl.hpp +++ /dev/null @@ -1 +0,0 @@ -../../kernels/hip_f8_impl.hpp \ No newline at end of file diff --git a/src/include/miopen/hip_float8.hpp b/src/include/miopen/hip_float8.hpp deleted file mode 120000 index 5e16a70c91..0000000000 --- a/src/include/miopen/hip_float8.hpp +++ /dev/null @@ -1 +0,0 @@ -../../kernels/hip_float8.hpp \ No newline at end of file diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index 827f4d1397..036da97728 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -30,51 +30,11 @@ #include -// Copied over from naive_conv.cpp -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef signed short int16_t; -typedef unsigned short uint16_t; -typedef float float_t; - -// std::conditional requires type_traits which has a few other things -// which result in collision with amd_hip_vector_types.h - -namespace std { -template -struct conditional; - -template -struct conditional -{ - using type = X; -}; - -template -struct conditional -{ - using type = Y; -}; - -template -using conditional_t = typename conditional::type; -} // namespace std -#else -#include // int8_t, int16_t -#include // float_t -#endif -#endif // __HIPCC_RTC__ - -#include // std::numeric_limits - #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" +#include "miopen_limits.hpp" + struct Numerics { float sum; diff --git a/src/kernels/MIOpenLayerNorm.cpp b/src/kernels/MIOpenLayerNorm.cpp index 58891d6538..bad09c563e 100644 --- a/src/kernels/MIOpenLayerNorm.cpp +++ b/src/kernels/MIOpenLayerNorm.cpp @@ -25,6 +25,12 @@ *******************************************************************************/ #ifdef MIOPEN_BETA_API +#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +#include +#include +#endif + +#include "miopen_cstdint.hpp" #include "float_types.h" //#if MIOPEN_USE_BFP16 == 1 diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp index c1a77c90db..2244385686 100644 --- a/src/kernels/bfloat16_dev.hpp +++ b/src/kernels/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32 // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/kernels/float_types.h b/src/kernels/float_types.h index 5406ba85ec..beded11d8d 100644 --- a/src/kernels/float_types.h +++ b/src/kernels/float_types.h @@ -34,7 +34,7 @@ #define FOUR 4 #define EIGHT 8 #if MIOPEN_USE_FP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float // HIP implements the correct operators for conversion @@ -58,7 +58,7 @@ #endif // MIOPEN_USE_FP8 #if MIOPEN_USE_BFP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float #else @@ -79,7 +79,7 @@ // #endif #endif // MIOPEN_USE_BFP8 -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ #define _FLOAT2 PPCAT(_FLOAT, TWO) #define _FLOAT4 PPCAT(_FLOAT, FOUR) #define _FLOAT8 PPCAT(_FLOAT, EIGHT) @@ -99,19 +99,19 @@ #endif #if MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM double #else #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define _FLOAT_ACCUM double -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define MAX_VAL_ACCUM DBL_MAX #else // MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM float #else #define _FLOAT_ACCUM float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #ifndef FLT_MAX #define MAX_VAL_ACCUM 3.402823466e+38F #else @@ -120,12 +120,12 @@ #endif // MIOPEN_USE_DOUBLE_ACCUM #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT _Float16 -#else // __HIP_PLATFORM_HCC__ +#else // __HIP_PLATFORM_AMD__ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define _FLOAT half -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 2 // Max value for the main datatype #ifndef HALF_MAX @@ -136,11 +136,11 @@ #endif // MIOPEN_USE_FP16 #if MIOPEN_USE_FP32 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT float #else #define _FLOAT float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 4 // Max value for the main datatype #ifndef FLT_MAX @@ -151,7 +151,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT ushort #else #define _FLOAT ushort @@ -162,7 +162,7 @@ #endif // MIOPEN_USE_BFP16 #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -188,7 +188,7 @@ /// refactoring should be considered as nontrivial and requires /// a separate PR. Let's keep this historical stuff for now. /// --atamazov 30.08.2023 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -202,7 +202,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED @@ -232,7 +232,7 @@ #endif #if MIOPEN_USE_NATIVE_DATATYPE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #undef FLOAT_ACCUM #define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED #else @@ -250,7 +250,7 @@ #define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x)) #undef CVT_INTEGRAL2ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #else #if MIOPEN_USE_BFP16 == 1 diff --git a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp index d1217c0f69..587b0b4191 100644 --- a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp +++ b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp @@ -28,6 +28,8 @@ #include #endif +#include "miopen_cstdint.hpp" + #ifndef BATCHED_TRANSPOSE_OCCUPANCY #define BATCHED_TRANSPOSE_OCCUPANCY 4 #endif diff --git a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp index c88fa3cbc6..a6e917c855 100644 --- a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp +++ b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp @@ -24,34 +24,23 @@ * *******************************************************************************/ #ifndef GENERAL_TENSOR_REORDER_UTIL_HPP -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef signed char int8_t; -typedef signed short int16_t; -typedef unsigned int uint32_t; - -#else -#include // int8_t, int16_t -#endif -#endif // __HIPCC_RTC__ +#define GENERAL_TENSOR_REORDER_UTIL_HPP #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include #endif +#include "miopen_cstdint.hpp" + #ifndef TENSOR_REORDER_OCCUPANCY #define TENSOR_REORDER_OCCUPANCY 4 #endif -#define GENERAL_TENSOR_REORDER_UTIL_HPP template struct order { - static constexpr std::size_t m_size = sizeof...(Is); + static constexpr size_t m_size = sizeof...(Is); // the last dummy element is to prevent compiler complain about empty array, when mSize = 0 static constexpr int m_data[m_size + 1] = {Is..., 0}; diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index 3b4eabecfb..724ef3d5af 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -30,47 +30,9 @@ #include #endif -// Copied over from naive_conv.cpp -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef signed short int16_t; -typedef unsigned short uint16_t; -typedef float float_t; - -// std::conditional requires type_traits which has a few other things -// which result in collition with amd_hip_vector_types.h - -namespace std { -template -struct conditional; - -template -struct conditional -{ - using type = X; -}; - -template -struct conditional -{ - using type = Y; -}; - -template -using conditional_t = typename conditional::type; -} // namespace std -#else -#include // int8_t, int16_t -#include // float_t -#endif -#endif // __HIPCC_RTC__ - -#include // std::numeric_limits +#include "miopen_cstdint.hpp" +#include "miopen_type_traits.hpp" +#include "miopen_limits.hpp" #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp index 125eff94f3..d9a6c133d3 100644 --- a/src/kernels/gpu_reference_kernel/naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp @@ -28,23 +28,8 @@ #include #endif -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef signed char int8_t; -typedef signed short int16_t; -typedef float float_t; -#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#include // std::numeric_limits -#endif - -#else -#include // int8_t, int16_t -#include // float_t -#endif -#endif // __HIPCC_RTC__ +#include "miopen_cstdint.hpp" +#include "miopen_limits.hpp" #include "stride_array.hpp" diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp index c7a62f9f72..9cb63ec0c6 100644 --- a/src/kernels/hip_f8_impl.hpp +++ b/src/kernels/hip_f8_impl.hpp @@ -25,9 +25,13 @@ *******************************************************************************/ // #include // #include + +#include "miopen_cstdint.hpp" +#include "miopen_type_traits.hpp" + namespace miopen_hip_f8_impl { -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ using hip_bfloat16 = bfloat16; using half = half_float::half; #endif @@ -87,8 +91,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8_no_range_reduce(T _x, template MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng) { - constexpr bool is_half = std::is_same::value; - constexpr bool is_float = std::is_same::value; + constexpr bool is_half = __is_same_as(T, half); + constexpr bool is_float = __is_same_as(T, float); static_assert(wm + we == 7, "wm+we==7"); static_assert(is_half || is_float, "Only half and float can be cast to f8"); @@ -272,8 +276,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng) template MIOPEN_HIP_HOST_DEVICE T cast_from_f8(uint8_t x) { - constexpr bool is_half = std::is_same::value; - constexpr bool is_float = std::is_same::value; + constexpr bool is_half = __is_same_as(T, half); + constexpr bool is_float = __is_same_as(T, float); static_assert(is_half || is_float, "only half and float are supported"); constexpr int weo = is_half ? 5 : 8; diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index dd57c9ca5b..e45c616d31 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -24,12 +24,16 @@ * *******************************************************************************/ #pragma once + +#include "miopen_cstdint.hpp" + #ifndef MIOPEN_ENABLE_F8_DEVICE_CODE #define MIOPEN_ENABLE_F8_DEVICE_CODE 0 #endif // FP8 header version 0.4, 2021/05/11 -#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE +// Updated by atamazov 2023/12/22 +#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE // MIOpen by default does not have device code in the regular compilation paths, // therefore, when this file is used from the host side, compilation takes much // longer. By guarding the __device__ directive we can control that such compilation @@ -83,6 +87,9 @@ inline MIOPEN_HIP_HOST_DEVICE bool get_hip_f8_bias_mode() #endif } +template +class numeric_limits; + template struct hip_f8 { @@ -262,8 +269,7 @@ struct hip_f8 inline MIOPEN_HIP_HOST_DEVICE bool operator==(const hip_f8& rhs) const { - if((rhs.is_zero() && this->is_zero()) || - (fabs(rhs - *this) < std::numeric_limits>::epsilon())) + if((rhs.is_zero() && this->is_zero()) || (this->data == rhs.data)) return true; else if(rhs.is_nan() || rhs.is_inf() || this->is_nan() || this->is_inf()) return false; @@ -483,19 +489,6 @@ MIOPEN_HIP_HOST_DEVICE T F8_Max() x.bits = 0x7F; return x.value; } -} // namespace miopen_f8 - -// define numeric limits for the new data type -namespace std { -inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT -{ - return x.is_inf(); -} - -inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT -{ - return x.is_inf(); -} template <> class numeric_limits> @@ -517,11 +510,14 @@ class numeric_limits> return miopen_f8::F8_Max>(); } + /// \todo This is wrong. min() should minimum normalized positive value. static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 min() { return static_cast>(-1.0f) * miopen_f8::F8_Max>(); } + + static constexpr int digits = 4; }; template <> @@ -544,14 +540,69 @@ class numeric_limits> return static_cast>( miopen_f8::F8_Max>()); } + + /// \todo This is wrong. min() should minimum normalized positive value. static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 min() { return static_cast>(-1.0f) * miopen_f8::F8_Max>(); } + + static constexpr int digits = 3; +}; + +} // namespace miopen_f8 + +#ifdef __HIPCC_RTC__ +// Assume that if hipRTC is used, then we get for F8 +// from the precompiled header. +#else +// NOLINTBEGIN(cert-dcl58-cpp) +namespace std { +inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_inf(); +} + +inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_inf(); +} + +inline bool isnan(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_nan(); +} + +inline bool isnan(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_nan(); +} + +} // namespace std + // NOLINTEND(cert-dcl58-cpp) +#endif + +// NOLINTBEGIN(cert-dcl58-cpp) +namespace std { + +template +class numeric_limits; + +template <> +class numeric_limits> + : public miopen_f8::numeric_limits> +{ +}; + +template <> +class numeric_limits> + : public miopen_f8::numeric_limits> +{ }; } // namespace std +// NOLINTEND(cert-dcl58-cpp) template struct hip_f8x4 diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp new file mode 100644 index 0000000000..752805244b --- /dev/null +++ b/src/kernels/miopen_cstdint.hpp @@ -0,0 +1,41 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +#if HIP_PACKAGE_VERSION_FLAT >= 6000024000ULL +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef __hip_internal::uint64_t uint64_t; +#endif + +#else +#include // int8_t, int16_t +#endif diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp new file mode 100644 index 0000000000..2a8f5e6178 --- /dev/null +++ b/src/kernels/miopen_limits.hpp @@ -0,0 +1,98 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifndef WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS +#define WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS 0 +#endif + +#if defined(MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS) && !WORKAROUND_DONT_USE_CUSTOM_LIMITS + +#include + +#define MIOPEN_ENABLE_F8_DEVICE_CODE 1 +#include "hip_float8.hpp" + +namespace std { + +template +class numeric_limits; + +template <> +class numeric_limits +{ +public: + static constexpr __device__ float max() noexcept { return 0x1.FFFFFEp+127f; } + + static constexpr __device__ float min() noexcept { return 0x1p-126f; } +}; + +template <> +class numeric_limits<_Float16> +{ +public: + static constexpr __device__ _Float16 max() noexcept + { + return static_cast<_Float16>(0x1.FFCp+15f); + } + + static constexpr __device__ _Float16 min() noexcept { return static_cast<_Float16>(0x1p-14f); } +}; + +template <> +class numeric_limits +{ +public: + static +#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL + constexpr +#endif + __device__ hip_bfloat16 + max() noexcept + { + // data = 0x7F7F + return static_cast(0x1.FEp+127f); + } + + static +#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL + constexpr +#endif + __device__ hip_bfloat16 + min() noexcept + { + // data = 0x0080 + return static_cast(0x1p-14f); + } +}; + +} // namespace std + +#else + +#include + +#endif diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp new file mode 100644 index 0000000000..b9cf11cd63 --- /dev/null +++ b/src/kernels/miopen_type_traits.hpp @@ -0,0 +1,148 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS + +namespace std { + +template +struct remove_reference +{ + typedef T type; +}; +template +struct remove_reference +{ + typedef T type; +}; +template +struct remove_reference +{ + typedef T type; +}; + +template +using remove_reference_t = typename remove_reference::type; + +template +struct remove_const +{ + typedef T type; +}; +template +struct remove_const +{ + typedef T type; +}; + +template +struct remove_volatile +{ + typedef T type; +}; +template +struct remove_volatile +{ + typedef T type; +}; + +template +struct remove_cv +{ + typedef typename remove_volatile::type>::type type; +}; + +#if HIP_PACKAGE_VERSION_FLAT >= 6000024000ULL +template +struct integral_constant +{ + static constexpr T value = v; + using value_type = T; + using type = integral_constant; + constexpr operator value_type() const noexcept { return value; } + constexpr value_type operator()() const noexcept { return value; } +}; + +using true_type = integral_constant; +using false_type = integral_constant; + +template +struct is_same : false_type +{ +}; + +template +struct is_same : true_type +{ +}; + +template +using enable_if = __hip_internal::enable_if; + +template +using enable_if_t = typename __hip_internal::enable_if::type; +#endif + +template +struct is_pointer_helper : false_type +{ +}; + +template +struct is_pointer_helper : true_type +{ +}; + +template +struct is_pointer : is_pointer_helper::type> +{ +}; + +template +struct conditional; + +template +struct conditional +{ + using type = X; +}; + +template +struct conditional +{ + using type = Y; +}; + +template +using conditional_t = typename conditional::type; + +} // namespace std +#else + +#include // std::remove_reference, std::remove_cv, is_pointer + +#endif diff --git a/src/kernels/miopen_utility.hpp b/src/kernels/miopen_utility.hpp new file mode 100644 index 0000000000..584ffad278 --- /dev/null +++ b/src/kernels/miopen_utility.hpp @@ -0,0 +1,52 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS + +#include "miopen_type_traits.hpp" // std::remove_reference + +namespace std { + +template +constexpr T&& forward(typename remove_reference::type& t_) noexcept +{ + return static_cast(t_); +} + +template +constexpr T&& forward(typename remove_reference::type&& t_) noexcept +{ + return static_cast(t_); +} + +} // namespace std + +#else + +#include + +#endif diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp index 19cf75624b..88036f8f4d 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp @@ -2,66 +2,7 @@ #define CK_UTILS_TYPE_HPP #include "static_kernel_integral_constant.hpp" - -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::remove_reference and std::remove_cv. -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; - -template -struct remove_const -{ - typedef T type; -}; -template -struct remove_const -{ - typedef T type; -}; - -template -struct remove_volatile -{ - typedef T type; -}; -template -struct remove_volatile -{ - typedef T type; -}; - -template -struct remove_cv -{ - typedef typename remove_volatile::type>::type type; -}; - -} // namespace std -#else -#include // std::remove_reference, std::remove_cv -#endif -#endif // __HIPCC_RTC__ +#include "miopen_type_traits.hpp" namespace ck { diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp index b4c9c2806d..d1a883b2ca 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp @@ -5,6 +5,7 @@ #include "hip/hip_runtime.h" #include "hip/hip_fp16.h" #endif +#include "miopen_cstdint.hpp" #include "bfloat16_dev.hpp" // index type: unsigned or signed diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp index e996e7a8b0..87e830954c 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp @@ -26,9 +26,7 @@ #ifndef CK_REDUCTION_OPERATOR_HPP #define CK_REDUCTION_OPERATOR_HPP -#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#include -#endif +#include "miopen_limits.hpp" #include "static_kernel_reduction_common.hpp" namespace ck { diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp index 1118550815..343b9d388d 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp @@ -5,34 +5,7 @@ #include "static_kernel_ck_utils_type.hpp" #include "static_kernel_sequence.hpp" -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::forward. In some cases, it includes -/// (this is against the Standard, but it doesn't matter in this case). -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -constexpr T&& forward(typename remove_reference::type& t_) noexcept -{ - return static_cast(t_); -} - -template -constexpr T&& forward(typename remove_reference::type&& t_) noexcept -{ - return static_cast(t_); -} - -} // namespace std -#else -#include // std::forward -#endif -#endif // __HIPCC_RTC__ +#include "miopen_utility.hpp" // std::forward namespace ck { diff --git a/src/kernels/stride_array.hpp b/src/kernels/stride_array.hpp index 32cb1f85b6..95c86fc79e 100644 --- a/src/kernels/stride_array.hpp +++ b/src/kernels/stride_array.hpp @@ -25,12 +25,6 @@ *******************************************************************************/ #pragma once -#ifdef __HIPCC_RTC__ -#ifndef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -#include -#endif -#endif // __HIPCC_RTC__ - /// \todo Uncomment when hip RTC accepts std::array -- amberhassaan // #include // using StrideIndexType = int; diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 8dce8f364e..e04dc5c2ea 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -529,7 +529,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc, const int seqLength, const RNNDir_t dir) { - if(miopen::IsLoggingCmd()) + if(miopen::IsLoggingCmd() && seqLength > 0) { std::string mode; miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 030adb196d..44f6462a61 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -175,6 +175,10 @@ message(STATUS "MIOPEN_TEST_GPU_XNACK_ENABLED ${MIOPEN_TEST_GPU_XNACK_ENABLED}") message(STATUS "MIOPEN_TEST_GPU_DETECTION_FAILED ${MIOPEN_TEST_GPU_DETECTION_FAILED}") message(STATUS "MIOPEN_TEST_WITH_MIOPENDRIVER ${MIOPEN_TEST_WITH_MIOPENDRIVER}") message(STATUS "MIOPEN_TEST_MLIR ${MIOPEN_TEST_MLIR}") +message(STATUS "MIOPEN_TEST_CONV ${MIOPEN_TEST_CONV}") +message(STATUS "MIOPEN_TEST_DEEPBENCH ${MIOPEN_TEST_DEEPBENCH}") +message(STATUS "MIOPEN_TEST_DRIVER_ITER_MODE ${MIOPEN_TEST_DRIVER_ITER_MODE}") +message(STATUS "MIOPEN_TEST_COMPOSABLEKERNEL ${MIOPEN_TEST_COMPOSABLEKERNEL}") if(MIOPEN_TEST_DRIVER_ITER_MODE) add_definitions(-DMIOPEN_TEST_DRIVER_MODE=2) @@ -230,6 +234,7 @@ message(STATUS "MIOPEN_TEST_FLOAT ${MIOPEN_TEST_FLOAT}") message(STATUS "MIOPEN_TEST_HALF ${MIOPEN_TEST_HALF}") message(STATUS "MIOPEN_TEST_BFLOAT16 ${MIOPEN_TEST_BFLOAT16}") message(STATUS "MIOPEN_TEST_INT8 ${MIOPEN_TEST_INT8}") +message(STATUS "MIOPEN_TEST_ALL ${MIOPEN_TEST_ALL}") set_var_to_condition(WORKAROUND_ISSUE_1187_DEFAULT MIOPEN_TEST_GFX90A AND MIOPEN_TEST_FLOAT) option( WORKAROUND_ISSUE_1187 "" ${WORKAROUND_ISSUE_1187_DEFAULT}) @@ -372,9 +377,10 @@ function(add_test_executable TEST_NAME) endif() if(NOT MIOPEN_EMBED_DB STREQUAL "") target_link_libraries(${TEST_NAME} MIOpen miopen_data) - else() + else() target_link_libraries(${TEST_NAME} MIOpen) endif() + target_include_directories(${TEST_NAME} PRIVATE ../src/kernels) endfunction(add_test_executable) set(MIOPEN_TEST_SANITIZERS) diff --git a/test/cpu_conv.hpp b/test/cpu_conv.hpp index 560c867c6b..f9001ef667 100644 --- a/test/cpu_conv.hpp +++ b/test/cpu_conv.hpp @@ -39,7 +39,7 @@ #include "tensor_holder.hpp" #include #include -#include +#include template static constexpr auto make_array(T x, Ts... xs) diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 4b7f2aaf08..1a15935192 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -24,7 +24,7 @@ function(add_gtest TEST_NAME) add_dependencies(tests test_${TEST_NAME}) add_dependencies(check test_${TEST_NAME}) target_compile_options(test_${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef) - target_include_directories(test_${TEST_NAME} PRIVATE ../) + target_include_directories(test_${TEST_NAME} PRIVATE ../ ../../src/kernels) if(MIOPEN_ENABLE_AI_KERNEL_TUNING) target_include_directories(test_${TEST_NAME} SYSTEM PRIVATE $) target_include_directories(test_${TEST_NAME} SYSTEM PRIVATE $) @@ -35,7 +35,8 @@ function(add_gtest TEST_NAME) target_link_libraries(test_${TEST_NAME} gtest_main MIOpen ${Boost_LIBRARIES} hip::host $) endif() # Enable CMake to discover the test binary - gtest_discover_tests(test_${TEST_NAME} PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}") + # Extend GTest DISCOVERY_TIMEOUT to 5 mins + gtest_discover_tests(test_${TEST_NAME} DISCOVERY_TIMEOUT 300 PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}") endif() endfunction() @@ -50,3 +51,9 @@ foreach(TEST ${TESTS}) get_filename_component(BASE_NAME ${TEST} NAME_WE) add_gtest(${BASE_NAME}) endforeach() + +message(STATUS "gtest env: MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR}") +message(STATUS "gtest env: MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG}") +message(STATUS "gtest env: MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL}") +message(STATUS "gtest env: MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR}") +message(STATUS "gtest env: MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}") diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp index febc35ae01..d99d149a94 100644 --- a/test/gtest/solver_bwd.hpp +++ b/test/gtest/solver_bwd.hpp @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp index a41667d5fc..a78f65dc04 100644 --- a/test/gtest/solver_fwd.hpp +++ b/test/gtest/solver_fwd.hpp @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp index 6adba65bf6..a3268dbe2e 100644 --- a/test/gtest/solver_wrw.hpp +++ b/test/gtest/solver_wrw.hpp @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/test/handle_test.cpp b/test/handle_test.cpp index 1ab319d6fb..4abc08edcc 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -28,6 +28,14 @@ /// \todo Create dedicated ticket and rename macro. #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1 +// https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 +// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not +// yet fixed in the compiler. In order to test such release candidates, we have to +// override HIP version to 6.1.0. +#define WORKAROUND_ISSUE_2600 \ + ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000999999ULL) || \ + HIP_PACKAGE_VERSION_FLAT == 6001000000ULL) + #include #include #include @@ -207,7 +215,7 @@ std::string WriteNop(kernel_type_t kern_type) void test_warnings(kernel_type_t kern_type) { auto&& h = get_handle(); -#if MIOPEN_BUILD_DEV +#if MIOPEN_BUILD_DEV && !WORKAROUND_ISSUE_2600 if(kern_type == miopenOpenCLKernelType) EXPECT(throws([&] { h.AddKernel("GEMM", "", WriteNop(kern_type), "write", {1, 1, 1}, {1, 1, 1}, ""); diff --git a/test/na_train.cpp b/test/na_train.cpp index 24529058ed..80f3fc781f 100644 --- a/test/na_train.cpp +++ b/test/na_train.cpp @@ -802,7 +802,7 @@ struct na_fusion_driver : test_driver std::size_t input_n, input_c, input_h, input_w; std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths()); - this->tolerance = 80 * float(input.desc.GetElementSize()); + this->tolerance = 80 * double(input.desc.GetElementSize()); ptr_activdesc = GetManagedActivDesc(); miopenSetActivationDescriptor(ptr_activdesc.get(), activ_mode, alpha, beta, gamma); auto&& handle = get_handle(); diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp index 3fda3b5cf7..7b013aa240 100644 --- a/test/tensor_holder.hpp +++ b/test/tensor_holder.hpp @@ -44,7 +44,7 @@ #endif using half = half_float::half; using hip_bfloat16 = bfloat16; -#include +#include using float8 = miopen_f8::hip_f8; using bfloat8 = miopen_f8::hip_f8; diff --git a/test/verify.hpp b/test/verify.hpp index 984237a48c..339eeeee76 100644 --- a/test/verify.hpp +++ b/test/verify.hpp @@ -36,7 +36,7 @@ #include using half = half_float::half; using hip_bfloat16 = bfloat16; -#include +#include #include "tensor_holder.hpp" namespace miopen {