diff --git a/CMakeLists.txt b/CMakeLists.txt
index 46cf416782..72ef595742 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,7 +99,7 @@ if(NOT WIN32 AND NOT APPLE)
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
 endif()
 
-rocm_setup_version(VERSION 3.00.0)
+rocm_setup_version(VERSION 3.0.1)
 
 list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
 include(TargetFlags)
@@ -380,7 +380,7 @@ if(MIOPEN_USE_HIPRTC)
         message(FATAL_ERROR "HIPRTC can be used only together with COMGR")
     endif()
     find_package(hiprtc REQUIRED)
-    message(STATUS "Build with HIPRTC")
+    message(STATUS "Build with HIPRTC ${hiprtc_VERSION}")
 endif()
 
 option(Boost_USE_STATIC_LIBS "Use boost static libraries" ON)
@@ -761,6 +761,19 @@ enable_cppcheck(
         knownConditionTrueFalse
         shadowFunction
         moduloofone
+        ###################################################################
+        # TODO Code Quality WORKAROUND ROCm 6.0 &&
+        # Ubuntu 22.04 && cppcheck 2.12.1 update
+        ###################################################################
+        duplInheritedMember
+        constParameterCallback
+        constParameterReference
+        constParameterPointer
+        constVariableReference
+        constVariablePointer
+        useStlAlgorithm
+        uselessOverride
+        unusedScopedObject   
     FORCE
     SOURCES
         addkernels/
diff --git a/Dockerfile b/Dockerfile
index d958879d3c..36e4dc484e 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:20.04 as miopen
+FROM ubuntu:22.04 as miopen
 ARG DEBIAN_FRONTEND=noninteractive
 
 # Support multiarch
@@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
 
-RUN wget https://repo.radeon.com/amdgpu-install/5.7/ubuntu/focal/amdgpu-install_5.7.50700-1_all.deb --no-check-certificate
+RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
-    ./amdgpu-install_5.7.50700-1_all.deb
+    ./amdgpu-install_6.0.60000-1_all.deb
 
 # Add rocm repository
-RUN export ROCM_APT_VER=5.7;\
+RUN export ROCM_APT_VER=6.0;\
 echo $ROCM_APT_VER &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
-RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"
 
 RUN amdgpu-install -y --usecase=rocm --no-dkms
 
@@ -94,11 +94,17 @@ RUN rm -rf /tmp/ccache* && mkdir /tmp/ccache && wget https://github.com/ccache/c
     cd /tmp/ccache-${CCACHE_COMMIT}/build && \
     cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/*
 RUN ccache -s 
+
+# purge existing composable kernel installed with ROCm
+# hence cannot use autoremove since it will remove more components
+RUN apt-get update && \
+DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \
+    composablekernel-dev
 ARG COMPILER_LAUNCHER=""
 RUN if [ "$USE_FIN" = "ON" ]; then \
-        rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
+        rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
     else \
-        rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
+        rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
     fi
 
 RUN ccache -s 
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 86ee06ae33..ddc6212455 100755
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,3 +1,3 @@
 ROCmSoftwarePlatform/rocm-recipes
 -f requirements.txt
-danmar/cppcheck@2.9
+danmar/cppcheck@2.12.1
diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md
index 3ae5db123a..b1e497efcc 100644
--- a/docs/DebugAndLogging.md
+++ b/docs/DebugAndLogging.md
@@ -94,7 +94,7 @@ Direct Solutions:
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`.
-* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`.
+* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2<n>` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1`
diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt
index 2918a5aa94..64ac494812 100644
--- a/driver/CMakeLists.txt
+++ b/driver/CMakeLists.txt
@@ -28,6 +28,7 @@ cmake_minimum_required( VERSION 3.5)
 find_package(Threads REQUIRED)
 
 add_executable(MIOpenDriver main.cpp InputFlags.cpp)
+target_include_directories(MIOpenDriver PRIVATE ../src/kernels)
 target_link_libraries(MIOpenDriver MIOpen)
 target_link_libraries(MIOpenDriver ${CMAKE_THREAD_LIBS_INIT})
 if(NOT MIOPEN_EMBED_DB STREQUAL "")
diff --git a/driver/driver.hpp b/driver/driver.hpp
index 0862652cd5..7a1503ed74 100644
--- a/driver/driver.hpp
+++ b/driver/driver.hpp
@@ -44,7 +44,7 @@
 #include <miopen/bfloat16.hpp>
 using half         = half_float::half;
 using hip_bfloat16 = bfloat16;
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 using float16 = half_float::half;
 using float8  = miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>;
 using bfloat8 = miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>;
diff --git a/driver/random.hpp b/driver/random.hpp
index b3be81f56e..a55b73cc96 100644
--- a/driver/random.hpp
+++ b/driver/random.hpp
@@ -109,7 +109,9 @@ inline T gen_subnorm()
     if constexpr(!std::is_integral_v<T> && !std::is_same_v<T, double> &&
                  details::has_digits<T>::value)
     {
-        using BitType = std::conditional_t<sizeof(T) == 2, uint16_t, uint32_t>;
+        using BitType = std::conditional_t<sizeof(T) == 1,
+                                           uint8_t,
+                                           std::conditional_t<sizeof(T) == 2, uint16_t, uint32_t>>;
         static_assert(sizeof(T) == sizeof(BitType));
 
         // -1 because ::digits counts the first implicit digit
diff --git a/speedtests/CMakeLists.txt b/speedtests/CMakeLists.txt
index 5832f43751..6f8086880a 100644
--- a/speedtests/CMakeLists.txt
+++ b/speedtests/CMakeLists.txt
@@ -16,7 +16,7 @@ function(add_speedtest_executable TEST_NAME)
     endif()
     separate_arguments(MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND ${MIOPEN_TEST_FLAGS})
     target_link_libraries(${TEST_NAME} MIOpen)
-    target_include_directories(${TEST_NAME} PRIVATE ../test)
+    target_include_directories(${TEST_NAME} PRIVATE ../test ../src/kernels)
 endfunction(add_speedtest_executable)
 
 foreach(TEST ${TESTS})
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2787733356..ace02095cc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -374,23 +374,27 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride1.inc
         kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride2.inc
         kernels/Conv_Winograd_v30_3_1_metadata.inc
-        kernels/xform_bidirect_winograd_code.inc
-        kernels/rocm_version.inc
-        kernels/inst_wrappers.inc
+        kernels/bfloat16_dev.hpp
         kernels/conv_common.inc
-        kernels/utilities.inc
-        kernels/xform_data_filter.inc
-        kernels/xform_kd_cov2.inc
-        kernels/xform_metadata.inc
-        kernels/neuron.inc
         kernels/conv_sizes.inc
-        kernels/gpr_alloc.inc
-        kernels/bfloat16_dev.hpp
         kernels/float_types.h
-        kernels/workaround_issue_1431.hpp
+        kernels/gpr_alloc.inc
         kernels/hip_f8_impl.hpp
         kernels/hip_float8.hpp
+        kernels/inst_wrappers.inc
+        kernels/miopen_cstdint.hpp
+        kernels/miopen_limits.hpp
+        kernels/miopen_type_traits.hpp
+        kernels/miopen_utility.hpp
+        kernels/neuron.inc
+        kernels/rocm_version.inc
         kernels/stride_array.hpp
+        kernels/utilities.inc
+        kernels/workaround_issue_1431.hpp
+        kernels/xform_bidirect_winograd_code.inc
+        kernels/xform_data_filter.inc
+        kernels/xform_kd_cov2.inc
+        kernels/xform_metadata.inc
         )
 
     set(MIOPEN_KERNELS
@@ -778,7 +782,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
     endif()
     if(ENABLE_HIP_WORKAROUNDS)
         # Workaround hip not setting its usage requirements correctly
-        target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 )
+        target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
     endif()
     # This is helpful for the tests
     target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
diff --git a/src/comgr.cpp b/src/comgr.cpp
index 18f41c862d..0a38a31d05 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1292,15 +1292,20 @@ void BuildHip(const std::string& name,
         auto opts =
             miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
         compiler::lc::RemoveOptionsUnwanted(opts);
-        opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
-        opts.push_back("-D__HIP_PLATFORM_HCC__=1");            // Workaround?
-        opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
+#if HIP_PACKAGE_VERSION_MAJOR < 6
+        opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
+#endif
+        opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
 #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
         if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
             opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
 #endif
         opts.push_back("-DHIP_PACKAGE_VERSION_FLAT=" + std::to_string(HIP_PACKAGE_VERSION_FLAT));
-        opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS=1");
+        opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS");
+        /// For now, use only standard <limits> to avoid possibility of
+        /// correctnes or performance regressions.
+        /// \todo Test and enable "custom" local implementation.
+        opts.push_back("-DWORKAROUND_DONT_USE_CUSTOM_LIMITS=1");
 #if WORKAROUND_ISSUE_1431
         if((StartsWith(target.Name(), "gfx10") || StartsWith(target.Name(), "gfx11")) &&
            !miopen::comgr::IsWave64Enforced(opts))
diff --git a/src/composable_kernel/.clang-tidy b/src/composable_kernel/.clang-tidy
index 5c2b781687..8d0880abcf 100644
--- a/src/composable_kernel/.clang-tidy
+++ b/src/composable_kernel/.clang-tidy
@@ -1,3 +1,3 @@
 CheckOptions:
   - key: bugprone-reserved-identifier.AllowedIdentifiers
-    value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__'
+    value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__'
diff --git a/src/composable_kernel/cmake/ClangTidy.cmake b/src/composable_kernel/cmake/ClangTidy.cmake
index 01b348c458..7c17f91f1f 100644
--- a/src/composable_kernel/cmake/ClangTidy.cmake
+++ b/src/composable_kernel/cmake/ClangTidy.cmake
@@ -149,7 +149,7 @@ function(clang_tidy_check TARGET)
             add_custom_target(${tidy_target}
                 # for some targets clang-tidy not able to get information from .clang-tidy
                 DEPENDS ${SOURCE}
-                COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
+                COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
                 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                 COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
             )
diff --git a/src/composable_kernel/composable_kernel/include/utility/array.hpp b/src/composable_kernel/composable_kernel/include/utility/array.hpp
index 23623560f7..63a912015c 100644
--- a/src/composable_kernel/composable_kernel/include/utility/array.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/array.hpp
@@ -4,34 +4,7 @@
 #include "functional2.hpp"
 #include "sequence.hpp"
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <utility> for std::forward. In some cases, it includes <type_traits>
-/// (this is against the Standard, but it doesn't matter in this case).
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-} // namespace std
-#else
-#include <utility> // std::forward
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_utility.hpp" // std::forward
 
 namespace ck {
 
diff --git a/src/composable_kernel/composable_kernel/include/utility/config.hpp b/src/composable_kernel/composable_kernel/include/utility/config.hpp
index 92307214f4..7869a075f2 100644
--- a/src/composable_kernel/composable_kernel/include/utility/config.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/config.hpp
@@ -6,6 +6,7 @@
 #include "hip/hip_fp16.h"
 #endif
 #include "bfloat16_dev.hpp"
+#include "miopen_cstdint.hpp"
 
 // "Constant" address space for kernel parameter
 #define CONSTANT __attribute__((address_space(4)))
diff --git a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
index 4d21f91e6a..01ae13a405 100644
--- a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
@@ -3,21 +3,8 @@
 
 #include "statically_indexed_array.hpp"
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef float float_t;
-#include <limits> // std::numeric_limits
-
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_cstdint.hpp"
+#include "miopen_limits.hpp"
 
 namespace ck {
 
@@ -978,7 +965,7 @@ struct inner_product_with_conversion
         return acc;
     }
 
-    __device__ T operator()(float_t a, float_t b) const { return convert(a) * convert(b); }
+    __device__ T operator()(float a, float b) const { return convert(a) * convert(b); }
 
     __device__ T operator()(int8x4_t a, int8x4_t b) const
     {
diff --git a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp
index 501e1bfc1c..30494214f8 100644
--- a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp
@@ -1,6 +1,8 @@
 #ifndef CK_ENABLE_IF_HPP
 #define CK_ENABLE_IF_HPP
 
+#include "miopen_type_traits.hpp"
+
 namespace ck {
 
 template <bool B, typename T = void>
diff --git a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp
index b7489016e9..174c697501 100644
--- a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp
@@ -6,6 +6,7 @@
 #include "number.hpp"
 #include "type.hpp"
 #include "tuple.hpp"
+#include "miopen_cstdint.hpp"
 
 namespace ck {
 
diff --git a/src/composable_kernel/composable_kernel/include/utility/type.hpp b/src/composable_kernel/composable_kernel/include/utility/type.hpp
index 4e5d4e5134..17769d52c0 100644
--- a/src/composable_kernel/composable_kernel/include/utility/type.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/type.hpp
@@ -3,84 +3,7 @@
 
 #include "integral_constant.hpp"
 #include "enable_if.hpp"
-
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <type_traits> for std::remove_reference and std::remove_cv.
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <class T>
-struct remove_reference
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&>
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&&>
-{
-    typedef T type;
-};
-
-template <class T>
-using remove_reference_t = typename remove_reference<T>::type;
-
-template <class T>
-struct remove_const
-{
-    typedef T type;
-};
-template <class T>
-struct remove_const<const T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_volatile
-{
-    typedef T type;
-};
-template <class T>
-struct remove_volatile<volatile T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_cv
-{
-    typedef typename remove_volatile<typename remove_const<T>::type>::type type;
-};
-
-template <class T>
-struct is_pointer_helper : std::false_type
-{
-};
-
-template <class T>
-struct is_pointer_helper<T*> : std::true_type
-{
-};
-
-template <class T>
-struct is_pointer : is_pointer_helper<typename std::remove_cv<T>::type>
-{
-};
-
-} // namespace std
-#else
-#include <type_traits> // std::remove_reference, std::remove_cv, is_pointer
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_type_traits.hpp"
 
 namespace ck {
 
diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
index 52d00346cf..02e8da33a9 100644
--- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
+++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
@@ -30,7 +30,7 @@
 extern "C" {
 #endif
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define EXECUTION_SPECIFIER __device__
 #else
 #define EXECUTION_SPECIFIER
@@ -43,7 +43,7 @@ typedef union
 
 // Composable kernels are written in HIP language. The language doesnt support
 // ushort2.hi or ushort2.low.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     ushort ushortvec[2];
 #endif // MIOPEN_BACKEND_HIP
     float f32;
@@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
 {
     cvt_bf16_fp32_t target_val;
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     target_val.ushortx2 = make_ushort2(0, src_val);
 #else
     target_val.ushortx2 = (ushort2)(0, src_val);
@@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
 // incrementing it causes it to become an exponent of 0xFF and a mantissa
 // of 0x00, which is Inf, the next higher value to the unrounded value.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
         target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
 #else
         target_val.u32 +=
@@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 #endif // MIOPEN_USE_RNE_BFLOAT16
     }
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     return target_val.ushortvec[1];
 #else
     return target_val.ushortx2.hi;
diff --git a/src/convolution.cpp b/src/convolution.cpp
index ac7c28fdc4..3c8e36e43a 100644
--- a/src/convolution.cpp
+++ b/src/convolution.cpp
@@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
         }
     }
 
-    std::size_t out_c;
+    std::size_t out_c = 0;
     std::vector<std::size_t> out_lens(spatial_dim + 2);
 
     auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim);
diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp
index fad06870ce..3d49d78bbb 100644
--- a/src/gemm_v2.cpp
+++ b/src/gemm_v2.cpp
@@ -104,8 +104,6 @@ static inline rocblas_computetype rocBlasComputeType_ex3(const miopen::GemmDescr
 
 static inline rocblas_datatype rocBlasComputeType(const miopen::GemmDescriptor& desc)
 {
-    // Complex compute types are only supported in newer version of the API
-    assert(desc.dataType == desc.a_cast_type && desc.dataType == desc.b_cast_type);
     if(desc.dataType == miopenInt8)
         return rocblas_datatype::rocblas_datatype_i32_r;
     else
diff --git a/src/include/miopen/hip_f8_impl.hpp b/src/include/miopen/hip_f8_impl.hpp
deleted file mode 120000
index 22052778a0..0000000000
--- a/src/include/miopen/hip_f8_impl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../kernels/hip_f8_impl.hpp
\ No newline at end of file
diff --git a/src/include/miopen/hip_float8.hpp b/src/include/miopen/hip_float8.hpp
deleted file mode 120000
index 5e16a70c91..0000000000
--- a/src/include/miopen/hip_float8.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../kernels/hip_float8.hpp
\ No newline at end of file
diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index 827f4d1397..036da97728 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -30,51 +30,11 @@
 
 #include <hip/hip_bfloat16.h>
 
-// Copied over from naive_conv.cpp
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef unsigned short uint16_t;
-typedef float float_t;
-
-// std::conditional requires type_traits which has a few other things
-// which result in collision with amd_hip_vector_types.h
-
-namespace std {
-template <bool predicate, typename X, typename Y>
-struct conditional;
-
-template <typename X, typename Y>
-struct conditional<true, X, Y>
-{
-    using type = X;
-};
-
-template <typename X, typename Y>
-struct conditional<false, X, Y>
-{
-    using type = Y;
-};
-
-template <bool predicate, typename X, typename Y>
-using conditional_t = typename conditional<predicate, X, Y>::type;
-} // namespace std
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#endif // __HIPCC_RTC__
-
-#include <limits> // std::numeric_limits
-
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
 
+#include "miopen_limits.hpp"
+
 struct Numerics
 {
     float sum;
diff --git a/src/kernels/MIOpenLayerNorm.cpp b/src/kernels/MIOpenLayerNorm.cpp
index 58891d6538..bad09c563e 100644
--- a/src/kernels/MIOpenLayerNorm.cpp
+++ b/src/kernels/MIOpenLayerNorm.cpp
@@ -25,6 +25,12 @@
  *******************************************************************************/
 #ifdef MIOPEN_BETA_API
 
+#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+#include <hip/hip_fp16.h>
+#include <hip/hip_runtime.h>
+#endif
+
+#include "miopen_cstdint.hpp"
 #include "float_types.h"
 
 //#if MIOPEN_USE_BFP16 == 1
diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp
index c1a77c90db..2244385686 100644
--- a/src/kernels/bfloat16_dev.hpp
+++ b/src/kernels/bfloat16_dev.hpp
@@ -30,7 +30,7 @@
 extern "C" {
 #endif
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define EXECUTION_SPECIFIER __device__
 #else
 #define EXECUTION_SPECIFIER
@@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32
 
 // Composable kernels are written in HIP language. The language doesnt support
 // ushort2.hi or ushort2.low.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     ushort ushortvec[2];
 #endif // MIOPEN_BACKEND_HIP
     float f32;
@@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
 {
     cvt_bf16_fp32_t target_val;
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     target_val.ushortx2 = make_ushort2(0, src_val);
 #else
     target_val.ushortx2 = (ushort2)(0, src_val);
@@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
 // incrementing it causes it to become an exponent of 0xFF and a mantissa
 // of 0x00, which is Inf, the next higher value to the unrounded value.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
         target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
 #else
         target_val.u32 +=
@@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 #endif // MIOPEN_USE_RNE_BFLOAT16
     }
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     return target_val.ushortvec[1];
 #else
     return target_val.ushortx2.hi;
diff --git a/src/kernels/float_types.h b/src/kernels/float_types.h
index 5406ba85ec..beded11d8d 100644
--- a/src/kernels/float_types.h
+++ b/src/kernels/float_types.h
@@ -34,7 +34,7 @@
 #define FOUR 4
 #define EIGHT 8
 #if MIOPEN_USE_FP8 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT hip_f8<miopen_f8::hip_f8_type::fp8>
 #define FLOAT_ACCUM float
 // HIP implements the correct operators for conversion
@@ -58,7 +58,7 @@
 #endif // MIOPEN_USE_FP8
 
 #if MIOPEN_USE_BFP8 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT hip_f8<miopen_f8::hip_f8_type::bf8>
 #define FLOAT_ACCUM float
 #else
@@ -79,7 +79,7 @@
 // #endif
 #endif // MIOPEN_USE_BFP8
 
-#ifndef __HIP_PLATFORM_HCC__
+#ifndef __HIP_PLATFORM_AMD__
 #define _FLOAT2 PPCAT(_FLOAT, TWO)
 #define _FLOAT4 PPCAT(_FLOAT, FOUR)
 #define _FLOAT8 PPCAT(_FLOAT, EIGHT)
@@ -99,19 +99,19 @@
 #endif
 
 #if MIOPEN_USE_DOUBLE_ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT_ACCUM double
 #else
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 #define _FLOAT_ACCUM double
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #define MAX_VAL_ACCUM DBL_MAX
 #else // MIOPEN_USE_DOUBLE_ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT_ACCUM float
 #else
 #define _FLOAT_ACCUM float
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #ifndef FLT_MAX
 #define MAX_VAL_ACCUM 3.402823466e+38F
 #else
@@ -120,12 +120,12 @@
 #endif // MIOPEN_USE_DOUBLE_ACCUM
 
 #if MIOPEN_USE_FP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT _Float16
-#else // __HIP_PLATFORM_HCC__
+#else // __HIP_PLATFORM_AMD__
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #define _FLOAT half
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #define SIZEOF_FLOAT 2
 // Max value for the main datatype
 #ifndef HALF_MAX
@@ -136,11 +136,11 @@
 #endif // MIOPEN_USE_FP16
 
 #if MIOPEN_USE_FP32 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT float
 #else
 #define _FLOAT float
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #define SIZEOF_FLOAT 4
 // Max value for the main datatype
 #ifndef FLT_MAX
@@ -151,7 +151,7 @@
 #endif // MIOPEN_USE_FP32
 
 #if MIOPEN_USE_BFP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT ushort
 #else
 #define _FLOAT ushort
@@ -162,7 +162,7 @@
 #endif // MIOPEN_USE_BFP16
 
 #if MIOPEN_USE_FP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
 #define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
 #define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
@@ -188,7 +188,7 @@
 /// refactoring should be considered as nontrivial and requires
 /// a separate PR. Let's keep this historical stuff for now.
 /// --atamazov 30.08.2023
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
 #define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
 #define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
@@ -202,7 +202,7 @@
 #endif // MIOPEN_USE_FP32
 
 #if MIOPEN_USE_BFP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
 #define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED
 #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
@@ -232,7 +232,7 @@
 #endif
 
 #if MIOPEN_USE_NATIVE_DATATYPE_ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #undef FLOAT_ACCUM
 #define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED
 #else
@@ -250,7 +250,7 @@
 #define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x))
 
 #undef CVT_INTEGRAL2ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
 #else
 #if MIOPEN_USE_BFP16 == 1
diff --git a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp
index d1217c0f69..587b0b4191 100644
--- a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp
+++ b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp
@@ -28,6 +28,8 @@
 #include <hip/hip_fp16.h>
 #endif
 
+#include "miopen_cstdint.hpp"
+
 #ifndef BATCHED_TRANSPOSE_OCCUPANCY
 #define BATCHED_TRANSPOSE_OCCUPANCY 4
 #endif
diff --git a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp
index c88fa3cbc6..a6e917c855 100644
--- a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp
+++ b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp
@@ -24,34 +24,23 @@
  *
  *******************************************************************************/
 #ifndef GENERAL_TENSOR_REORDER_UTIL_HPP
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef unsigned int uint32_t;
-
-#else
-#include <cstdint> // int8_t, int16_t
-#endif
-#endif // __HIPCC_RTC__
+#define GENERAL_TENSOR_REORDER_UTIL_HPP
 
 #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 #include <hip/hip_runtime.h>
 #include <hip/hip_fp16.h>
 #endif
 
+#include "miopen_cstdint.hpp"
+
 #ifndef TENSOR_REORDER_OCCUPANCY
 #define TENSOR_REORDER_OCCUPANCY 4
 #endif
-#define GENERAL_TENSOR_REORDER_UTIL_HPP
 
 template <int... Is>
 struct order
 {
-    static constexpr std::size_t m_size = sizeof...(Is);
+    static constexpr size_t m_size = sizeof...(Is);
     // the last dummy element is to prevent compiler complain about empty array, when mSize = 0
     static constexpr int m_data[m_size + 1] = {Is..., 0};
 
diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
index 3b4eabecfb..724ef3d5af 100644
--- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
@@ -30,47 +30,9 @@
 #include <hip/hip_bfloat16.h>
 #endif
 
-// Copied over from naive_conv.cpp
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef unsigned short uint16_t;
-typedef float float_t;
-
-// std::conditional requires type_traits which has a few other things
-// which result in collition with amd_hip_vector_types.h
-
-namespace std {
-template <bool predicate, typename X, typename Y>
-struct conditional;
-
-template <typename X, typename Y>
-struct conditional<true, X, Y>
-{
-    using type = X;
-};
-
-template <typename X, typename Y>
-struct conditional<false, X, Y>
-{
-    using type = Y;
-};
-
-template <bool predicate, typename X, typename Y>
-using conditional_t = typename conditional<predicate, X, Y>::type;
-} // namespace std
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#endif // __HIPCC_RTC__
-
-#include <limits> // std::numeric_limits
+#include "miopen_cstdint.hpp"
+#include "miopen_type_traits.hpp"
+#include "miopen_limits.hpp"
 
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp
index 125eff94f3..d9a6c133d3 100644
--- a/src/kernels/gpu_reference_kernel/naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp
@@ -28,23 +28,8 @@
 #include <hip/hip_runtime.h>
 #endif
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef float float_t;
-#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#include <limits> // std::numeric_limits
-#endif
-
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_cstdint.hpp"
+#include "miopen_limits.hpp"
 
 #include "stride_array.hpp"
 
diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp
index c7a62f9f72..9cb63ec0c6 100644
--- a/src/kernels/hip_f8_impl.hpp
+++ b/src/kernels/hip_f8_impl.hpp
@@ -25,9 +25,13 @@
  *******************************************************************************/
 // #include <miopen/bfloat16.hpp>
 // #include <half.hpp>
+
+#include "miopen_cstdint.hpp"
+#include "miopen_type_traits.hpp"
+
 namespace miopen_hip_f8_impl {
 
-#ifndef __HIP_PLATFORM_HCC__
+#ifndef __HIP_PLATFORM_AMD__
 using hip_bfloat16 = bfloat16;
 using half         = half_float::half;
 #endif
@@ -87,8 +91,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8_no_range_reduce(T _x,
 template <int wm, int we, typename T, bool negative_zero_nan, bool clip>
 MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng)
 {
-    constexpr bool is_half  = std::is_same<T, half>::value;
-    constexpr bool is_float = std::is_same<T, float>::value;
+    constexpr bool is_half  = __is_same_as(T, half);
+    constexpr bool is_float = __is_same_as(T, float);
     static_assert(wm + we == 7, "wm+we==7");
     static_assert(is_half || is_float, "Only half and float can be cast to f8");
 
@@ -272,8 +276,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng)
 template <int wm, int we, typename T, bool negative_zero_nan>
 MIOPEN_HIP_HOST_DEVICE T cast_from_f8(uint8_t x)
 {
-    constexpr bool is_half  = std::is_same<T, half>::value;
-    constexpr bool is_float = std::is_same<T, float>::value;
+    constexpr bool is_half  = __is_same_as(T, half);
+    constexpr bool is_float = __is_same_as(T, float);
     static_assert(is_half || is_float, "only half and float are supported");
 
     constexpr int weo = is_half ? 5 : 8;
diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index dd57c9ca5b..e45c616d31 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -24,12 +24,16 @@
  *
  *******************************************************************************/
 #pragma once
+
+#include "miopen_cstdint.hpp"
+
 #ifndef MIOPEN_ENABLE_F8_DEVICE_CODE
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 0
 #endif
 
 // FP8 header version 0.4, 2021/05/11
-#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE
+// Updated by atamazov 2023/12/22
+#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE
 // MIOpen by default does not have device code in the regular compilation paths,
 // therefore, when this file is used from the host side, compilation takes much
 // longer. By guarding the __device__ directive we can control that such compilation
@@ -83,6 +87,9 @@ inline MIOPEN_HIP_HOST_DEVICE bool get_hip_f8_bias_mode()
 #endif
 }
 
+template <typename T>
+class numeric_limits;
+
 template <hip_f8_type T>
 struct hip_f8
 {
@@ -262,8 +269,7 @@ struct hip_f8
 
     inline MIOPEN_HIP_HOST_DEVICE bool operator==(const hip_f8& rhs) const
     {
-        if((rhs.is_zero() && this->is_zero()) ||
-           (fabs(rhs - *this) < std::numeric_limits<hip_f8<T>>::epsilon()))
+        if((rhs.is_zero() && this->is_zero()) || (this->data == rhs.data))
             return true;
         else if(rhs.is_nan() || rhs.is_inf() || this->is_nan() || this->is_inf())
             return false;
@@ -483,19 +489,6 @@ MIOPEN_HIP_HOST_DEVICE T F8_Max()
     x.bits = 0x7F;
     return x.value;
 }
-} // namespace miopen_f8
-
-// define numeric limits for the new data type
-namespace std {
-inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
-{
-    return x.is_inf();
-}
-
-inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
-{
-    return x.is_inf();
-}
 
 template <>
 class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
@@ -517,11 +510,14 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
         return miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>();
     }
 
+    /// \todo This is wrong. min() should minimum normalized positive value.
     static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> min()
     {
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>(-1.0f) *
                miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>();
     }
+
+    static constexpr int digits = 4;
 };
 
 template <>
@@ -544,14 +540,69 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>(
             miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>());
     }
+
+    /// \todo This is wrong. min() should minimum normalized positive value.
     static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> min()
     {
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>(-1.0f) *
                miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>();
     }
+
+    static constexpr int digits = 3;
+};
+
+} // namespace miopen_f8
+
+#ifdef __HIPCC_RTC__
+// Assume that if hipRTC is used, then we get <cmath> for F8
+// from the precompiled header.
+#else
+// NOLINTBEGIN(cert-dcl58-cpp)
+namespace std {
+inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
+{
+    return x.is_inf();
+}
+
+inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
+{
+    return x.is_inf();
+}
+
+inline bool isnan(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
+{
+    return x.is_nan();
+}
+
+inline bool isnan(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
+{
+    return x.is_nan();
+}
+
+} // namespace std
+  // NOLINTEND(cert-dcl58-cpp)
+#endif
+
+// NOLINTBEGIN(cert-dcl58-cpp)
+namespace std {
+
+template <typename T>
+class numeric_limits;
+
+template <>
+class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
+    : public miopen_f8::numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
+{
+};
+
+template <>
+class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
+    : public miopen_f8::numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
+{
 };
 
 } // namespace std
+// NOLINTEND(cert-dcl58-cpp)
 
 template <miopen_f8::hip_f8_type T>
 struct hip_f8x4
diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp
new file mode 100644
index 0000000000..752805244b
--- /dev/null
+++ b/src/kernels/miopen_cstdint.hpp
@@ -0,0 +1,41 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+#if HIP_PACKAGE_VERSION_FLAT >= 6000024000ULL
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef __hip_internal::uint64_t uint64_t;
+#endif
+
+#else
+#include <cstdint> // int8_t, int16_t
+#endif
diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp
new file mode 100644
index 0000000000..2a8f5e6178
--- /dev/null
+++ b/src/kernels/miopen_limits.hpp
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifndef WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS
+#define WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS 0
+#endif
+
+#if defined(MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS) && !WORKAROUND_DONT_USE_CUSTOM_LIMITS
+
+#include <hip/hip_bfloat16.h>
+
+#define MIOPEN_ENABLE_F8_DEVICE_CODE 1
+#include "hip_float8.hpp"
+
+namespace std {
+
+template <typename T>
+class numeric_limits;
+
+template <>
+class numeric_limits<float>
+{
+public:
+    static constexpr __device__ float max() noexcept { return 0x1.FFFFFEp+127f; }
+
+    static constexpr __device__ float min() noexcept { return 0x1p-126f; }
+};
+
+template <>
+class numeric_limits<_Float16>
+{
+public:
+    static constexpr __device__ _Float16 max() noexcept
+    {
+        return static_cast<_Float16>(0x1.FFCp+15f);
+    }
+
+    static constexpr __device__ _Float16 min() noexcept { return static_cast<_Float16>(0x1p-14f); }
+};
+
+template <>
+class numeric_limits<hip_bfloat16>
+{
+public:
+    static
+#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL
+        constexpr
+#endif
+        __device__ hip_bfloat16
+        max() noexcept
+    {
+        // data = 0x7F7F
+        return static_cast<hip_bfloat16>(0x1.FEp+127f);
+    }
+
+    static
+#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL
+        constexpr
+#endif
+        __device__ hip_bfloat16
+        min() noexcept
+    {
+        // data = 0x0080
+        return static_cast<hip_bfloat16>(0x1p-14f);
+    }
+};
+
+} // namespace std
+
+#else
+
+#include <limits>
+
+#endif
diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp
new file mode 100644
index 0000000000..b9cf11cd63
--- /dev/null
+++ b/src/kernels/miopen_type_traits.hpp
@@ -0,0 +1,148 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+
+namespace std {
+
+template <class T>
+struct remove_reference
+{
+    typedef T type;
+};
+template <class T>
+struct remove_reference<T&>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_reference<T&&>
+{
+    typedef T type;
+};
+
+template <class T>
+using remove_reference_t = typename remove_reference<T>::type;
+
+template <class T>
+struct remove_const
+{
+    typedef T type;
+};
+template <class T>
+struct remove_const<const T>
+{
+    typedef T type;
+};
+
+template <class T>
+struct remove_volatile
+{
+    typedef T type;
+};
+template <class T>
+struct remove_volatile<volatile T>
+{
+    typedef T type;
+};
+
+template <class T>
+struct remove_cv
+{
+    typedef typename remove_volatile<typename remove_const<T>::type>::type type;
+};
+
+#if HIP_PACKAGE_VERSION_FLAT >= 6000024000ULL
+template <class T, T v>
+struct integral_constant
+{
+    static constexpr T value = v;
+    using value_type         = T;
+    using type               = integral_constant;
+    constexpr operator value_type() const noexcept { return value; }
+    constexpr value_type operator()() const noexcept { return value; }
+};
+
+using true_type  = integral_constant<bool, true>;
+using false_type = integral_constant<bool, false>;
+
+template <class T, class U>
+struct is_same : false_type
+{
+};
+
+template <class T>
+struct is_same<T, T> : true_type
+{
+};
+
+template <bool B, typename T = void>
+using enable_if = __hip_internal::enable_if<B, T>;
+
+template <bool B, typename T = void>
+using enable_if_t = typename __hip_internal::enable_if<B, T>::type;
+#endif
+
+template <class T>
+struct is_pointer_helper : false_type
+{
+};
+
+template <class T>
+struct is_pointer_helper<T*> : true_type
+{
+};
+
+template <class T>
+struct is_pointer : is_pointer_helper<typename remove_cv<T>::type>
+{
+};
+
+template <bool predicate, typename X, typename Y>
+struct conditional;
+
+template <typename X, typename Y>
+struct conditional<true, X, Y>
+{
+    using type = X;
+};
+
+template <typename X, typename Y>
+struct conditional<false, X, Y>
+{
+    using type = Y;
+};
+
+template <bool predicate, typename X, typename Y>
+using conditional_t = typename conditional<predicate, X, Y>::type;
+
+} // namespace std
+#else
+
+#include <type_traits> // std::remove_reference, std::remove_cv, is_pointer
+
+#endif
diff --git a/src/kernels/miopen_utility.hpp b/src/kernels/miopen_utility.hpp
new file mode 100644
index 0000000000..584ffad278
--- /dev/null
+++ b/src/kernels/miopen_utility.hpp
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+
+#include "miopen_type_traits.hpp" // std::remove_reference
+
+namespace std {
+
+template <typename T>
+constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
+{
+    return static_cast<T&&>(t_);
+}
+
+template <typename T>
+constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
+{
+    return static_cast<T&&>(t_);
+}
+
+} // namespace std
+
+#else
+
+#include <utility>
+
+#endif
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp
index 19cf75624b..88036f8f4d 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp
@@ -2,66 +2,7 @@
 #define CK_UTILS_TYPE_HPP
 
 #include "static_kernel_integral_constant.hpp"
-
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <type_traits> for std::remove_reference and std::remove_cv.
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <class T>
-struct remove_reference
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&>
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&&>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_const
-{
-    typedef T type;
-};
-template <class T>
-struct remove_const<const T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_volatile
-{
-    typedef T type;
-};
-template <class T>
-struct remove_volatile<volatile T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_cv
-{
-    typedef typename remove_volatile<typename remove_const<T>::type>::type type;
-};
-
-} // namespace std
-#else
-#include <type_traits> // std::remove_reference, std::remove_cv
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_type_traits.hpp"
 
 namespace ck {
 
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp
index b4c9c2806d..d1a883b2ca 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_config.hpp
@@ -5,6 +5,7 @@
 #include "hip/hip_runtime.h"
 #include "hip/hip_fp16.h"
 #endif
+#include "miopen_cstdint.hpp"
 #include "bfloat16_dev.hpp"
 
 // index type: unsigned or signed
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp
index e996e7a8b0..87e830954c 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp
@@ -26,9 +26,7 @@
 #ifndef CK_REDUCTION_OPERATOR_HPP
 #define CK_REDUCTION_OPERATOR_HPP
 
-#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#include <limits>
-#endif
+#include "miopen_limits.hpp"
 #include "static_kernel_reduction_common.hpp"
 
 namespace ck {
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp
index 1118550815..343b9d388d 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp
@@ -5,34 +5,7 @@
 #include "static_kernel_ck_utils_type.hpp"
 #include "static_kernel_sequence.hpp"
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <utility> for std::forward. In some cases, it includes <type_traits>
-/// (this is against the Standard, but it doesn't matter in this case).
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-} // namespace std
-#else
-#include <utility> // std::forward
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_utility.hpp" // std::forward
 
 namespace ck {
 
diff --git a/src/kernels/stride_array.hpp b/src/kernels/stride_array.hpp
index 32cb1f85b6..95c86fc79e 100644
--- a/src/kernels/stride_array.hpp
+++ b/src/kernels/stride_array.hpp
@@ -25,12 +25,6 @@
  *******************************************************************************/
 #pragma once
 
-#ifdef __HIPCC_RTC__
-#ifndef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-#include <cstdint>
-#endif
-#endif // __HIPCC_RTC__
-
 /// \todo Uncomment when hip RTC accepts std::array -- amberhassaan
 // #include <hip/amd_detail/amd_hip_vector_types.h>
 // using StrideIndexType = int;
diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp
index 8dce8f364e..e04dc5c2ea 100644
--- a/src/rnn_api.cpp
+++ b/src/rnn_api.cpp
@@ -529,7 +529,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc,
                       const int seqLength,
                       const RNNDir_t dir)
 {
-    if(miopen::IsLoggingCmd())
+    if(miopen::IsLoggingCmd() && seqLength > 0)
     {
         std::string mode;
         miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 030adb196d..44f6462a61 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -175,6 +175,10 @@ message(STATUS "MIOPEN_TEST_GPU_XNACK_ENABLED ${MIOPEN_TEST_GPU_XNACK_ENABLED}")
 message(STATUS "MIOPEN_TEST_GPU_DETECTION_FAILED ${MIOPEN_TEST_GPU_DETECTION_FAILED}")
 message(STATUS "MIOPEN_TEST_WITH_MIOPENDRIVER ${MIOPEN_TEST_WITH_MIOPENDRIVER}")
 message(STATUS "MIOPEN_TEST_MLIR ${MIOPEN_TEST_MLIR}")
+message(STATUS "MIOPEN_TEST_CONV ${MIOPEN_TEST_CONV}")
+message(STATUS "MIOPEN_TEST_DEEPBENCH ${MIOPEN_TEST_DEEPBENCH}")
+message(STATUS "MIOPEN_TEST_DRIVER_ITER_MODE ${MIOPEN_TEST_DRIVER_ITER_MODE}")
+message(STATUS "MIOPEN_TEST_COMPOSABLEKERNEL ${MIOPEN_TEST_COMPOSABLEKERNEL}")
 
 if(MIOPEN_TEST_DRIVER_ITER_MODE)
     add_definitions(-DMIOPEN_TEST_DRIVER_MODE=2)
@@ -230,6 +234,7 @@ message(STATUS "MIOPEN_TEST_FLOAT ${MIOPEN_TEST_FLOAT}")
 message(STATUS "MIOPEN_TEST_HALF ${MIOPEN_TEST_HALF}")
 message(STATUS "MIOPEN_TEST_BFLOAT16 ${MIOPEN_TEST_BFLOAT16}")
 message(STATUS "MIOPEN_TEST_INT8 ${MIOPEN_TEST_INT8}")
+message(STATUS "MIOPEN_TEST_ALL ${MIOPEN_TEST_ALL}")
 
 set_var_to_condition(WORKAROUND_ISSUE_1187_DEFAULT MIOPEN_TEST_GFX90A AND MIOPEN_TEST_FLOAT)
 option( WORKAROUND_ISSUE_1187 "" ${WORKAROUND_ISSUE_1187_DEFAULT})
@@ -372,9 +377,10 @@ function(add_test_executable TEST_NAME)
     endif()
     if(NOT MIOPEN_EMBED_DB STREQUAL "")
         target_link_libraries(${TEST_NAME} MIOpen miopen_data)
-    else()
+    else()       
         target_link_libraries(${TEST_NAME} MIOpen)
     endif()
+    target_include_directories(${TEST_NAME} PRIVATE ../src/kernels)
 endfunction(add_test_executable)
 
 set(MIOPEN_TEST_SANITIZERS)
diff --git a/test/cpu_conv.hpp b/test/cpu_conv.hpp
index 560c867c6b..f9001ef667 100644
--- a/test/cpu_conv.hpp
+++ b/test/cpu_conv.hpp
@@ -39,7 +39,7 @@
 #include "tensor_holder.hpp"
 #include <miopen/stringutils.hpp>
 #include <miopen/functional.hpp>
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 
 template <class T, class... Ts>
 static constexpr auto make_array(T x, Ts... xs)
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index 4b7f2aaf08..1a15935192 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -24,7 +24,7 @@ function(add_gtest TEST_NAME)
     add_dependencies(tests test_${TEST_NAME})
     add_dependencies(check test_${TEST_NAME})
     target_compile_options(test_${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
-    target_include_directories(test_${TEST_NAME} PRIVATE ../)
+    target_include_directories(test_${TEST_NAME} PRIVATE ../ ../../src/kernels)
     if(MIOPEN_ENABLE_AI_KERNEL_TUNING)
       target_include_directories(test_${TEST_NAME} SYSTEM PRIVATE $<BUILD_INTERFACE:${FDEEP_INCLUDE_DIR}>)
       target_include_directories(test_${TEST_NAME} SYSTEM PRIVATE $<BUILD_INTERFACE:${EIGEN_INCLUDE_DIR}/eigen3>)
@@ -35,7 +35,8 @@ function(add_gtest TEST_NAME)
       target_link_libraries(test_${TEST_NAME} gtest_main MIOpen ${Boost_LIBRARIES} hip::host $<BUILD_INTERFACE:roc::rocblas>)
     endif()
     # Enable CMake to discover the test binary
-    gtest_discover_tests(test_${TEST_NAME} PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}")
+    # Extend GTest DISCOVERY_TIMEOUT to 5 mins
+    gtest_discover_tests(test_${TEST_NAME} DISCOVERY_TIMEOUT 300 PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}")
 
   endif()
 endfunction()
@@ -50,3 +51,9 @@ foreach(TEST ${TESTS})
     get_filename_component(BASE_NAME ${TEST} NAME_WE)
     add_gtest(${BASE_NAME})
 endforeach()
+
+message(STATUS "gtest env: MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR}")
+message(STATUS "gtest env: MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG}")
+message(STATUS "gtest env: MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL}")
+message(STATUS "gtest env: MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR}")
+message(STATUS "gtest env: MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}")
diff --git a/test/gtest/solver_bwd.hpp b/test/gtest/solver_bwd.hpp
index febc35ae01..d99d149a94 100644
--- a/test/gtest/solver_bwd.hpp
+++ b/test/gtest/solver_bwd.hpp
@@ -32,7 +32,7 @@
 #include <fusionHost.hpp>
 #include <miopen/conv/data_invoke_params.hpp>
 
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 #include <miopen/type_name.hpp>
 #include <miopen/rank.hpp>
 
diff --git a/test/gtest/solver_fwd.hpp b/test/gtest/solver_fwd.hpp
index a41667d5fc..a78f65dc04 100644
--- a/test/gtest/solver_fwd.hpp
+++ b/test/gtest/solver_fwd.hpp
@@ -32,7 +32,7 @@
 #include <fusionHost.hpp>
 #include <miopen/conv/data_invoke_params.hpp>
 
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 #include <miopen/type_name.hpp>
 #include <miopen/rank.hpp>
 
diff --git a/test/gtest/solver_wrw.hpp b/test/gtest/solver_wrw.hpp
index 6adba65bf6..a3268dbe2e 100644
--- a/test/gtest/solver_wrw.hpp
+++ b/test/gtest/solver_wrw.hpp
@@ -32,7 +32,7 @@
 #include <fusionHost.hpp>
 #include <miopen/conv/wrw_invoke_params.hpp>
 
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 #include <miopen/type_name.hpp>
 #include <miopen/rank.hpp>
 
diff --git a/test/handle_test.cpp b/test/handle_test.cpp
index 1ab319d6fb..4abc08edcc 100644
--- a/test/handle_test.cpp
+++ b/test/handle_test.cpp
@@ -28,6 +28,14 @@
 /// \todo Create dedicated ticket and rename macro.
 #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1
 
+// https://gerrit-git.amd.com/c/compute/ec/clr/+/972441
+// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not
+// yet fixed in the compiler. In order to test such release candidates, we have to
+// override HIP version to 6.1.0.
+#define WORKAROUND_ISSUE_2600                                                                    \
+    ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000999999ULL) || \
+     HIP_PACKAGE_VERSION_FLAT == 6001000000ULL)
+
 #include <miopen/config.h>
 #include <miopen/handle.hpp>
 #include <miopen/execution_context.hpp>
@@ -207,7 +215,7 @@ std::string WriteNop(kernel_type_t kern_type)
 void test_warnings(kernel_type_t kern_type)
 {
     auto&& h = get_handle();
-#if MIOPEN_BUILD_DEV
+#if MIOPEN_BUILD_DEV && !WORKAROUND_ISSUE_2600
     if(kern_type == miopenOpenCLKernelType)
         EXPECT(throws([&] {
             h.AddKernel("GEMM", "", WriteNop(kern_type), "write", {1, 1, 1}, {1, 1, 1}, "");
diff --git a/test/na_train.cpp b/test/na_train.cpp
index 24529058ed..80f3fc781f 100644
--- a/test/na_train.cpp
+++ b/test/na_train.cpp
@@ -802,7 +802,7 @@ struct na_fusion_driver : test_driver
 
         std::size_t input_n, input_c, input_h, input_w;
         std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths());
-        this->tolerance                              = 80 * float(input.desc.GetElementSize());
+        this->tolerance                              = 80 * double(input.desc.GetElementSize());
         ptr_activdesc                                = GetManagedActivDesc();
         miopenSetActivationDescriptor(ptr_activdesc.get(), activ_mode, alpha, beta, gamma);
         auto&& handle = get_handle();
diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp
index 3fda3b5cf7..7b013aa240 100644
--- a/test/tensor_holder.hpp
+++ b/test/tensor_holder.hpp
@@ -44,7 +44,7 @@
 #endif
 using half         = half_float::half;
 using hip_bfloat16 = bfloat16;
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 using float8  = miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>;
 using bfloat8 = miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>;
 
diff --git a/test/verify.hpp b/test/verify.hpp
index 984237a48c..339eeeee76 100644
--- a/test/verify.hpp
+++ b/test/verify.hpp
@@ -36,7 +36,7 @@
 #include <miopen/bfloat16.hpp>
 using half         = half_float::half;
 using hip_bfloat16 = bfloat16;
-#include <miopen/hip_float8.hpp>
+#include <hip_float8.hpp>
 #include "tensor_holder.hpp"
 
 namespace miopen {