Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump MIOpen version to 3.1.0 and update CI docker #2519

Merged
merged 38 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
309309b
Update docker and miopen version
atamazov Nov 4, 2023
61e1721
update FIN to develop
atamazov Nov 4, 2023
58c4d97
update a few more requirements
junliume Nov 11, 2023
39572c0
overrisde existing installed files
junliume Nov 12, 2023
4a011d1
purge CK before installing new
junliume Nov 12, 2023
58f8d80
fix hip tidy issues
junliume Nov 13, 2023
7eefad5
add a few missing ones
junliume Nov 13, 2023
ed1a732
adopt review opinion
junliume Nov 13, 2023
8053a0c
Update CMakeLists.txt
junliume Nov 14, 2023
f958b46
fix issues typo and hiprtc header
junliume Nov 14, 2023
08a1ca3
Merge branch 'develop' into bump_version_fin
junliume Nov 14, 2023
699cf08
Keep base docker at Ubuntu 20.04
junliume Nov 17, 2023
3e847da
Revert limitations on limits header
junliume Nov 17, 2023
72eb180
add fixes
umangyadav Dec 5, 2023
10e9581
changes that works for MIGraphX
umangyadav Dec 5, 2023
f498789
rever changes for checknumerics
umangyadav Dec 5, 2023
8bc23eb
Merge branch 'develop' into fp8_fix
umangyadav Dec 5, 2023
69c0d99
Formatting
umangyadav Dec 5, 2023
aa09f36
Merge branch 'fp8_fix' into bump_version_fin
junliume Dec 5, 2023
cf02a64
update docker file
junliume Dec 5, 2023
ebeef1d
avoid ldd conflicts
junliume Dec 6, 2023
ee34b45
update CK commit hash
junliume Dec 6, 2023
dab5d3b
Merge branch 'develop' into bump_version_fin
junliume Dec 12, 2023
c6b4352
update dockerfile
junliume Dec 15, 2023
b35b497
Merge branch 'develop' into bump_version_fin
junliume Dec 15, 2023
9990a5d
update dockerfile
junliume Dec 16, 2023
5b00521
Merge branch 'develop' into bump_version_fin
junliume Dec 16, 2023
0133956
workaround build issues of CK
junliume Dec 17, 2023
dc55bba
fix the real issue in compiling rocMLIR
junliume Dec 17, 2023
0d48eb3
bump CK commit hash
junliume Dec 17, 2023
19b1a95
WA for Issue 2600 and turn on smoke tests by default
junliume Dec 17, 2023
592d5cb
ROCm 6.0 replaces all __HIP_PLATFORM_HCC__ with __HIP_PLATFORM_AMD__
junliume Dec 19, 2023
e74ed6d
Merge branch 'develop' into bump_version_fin
junliume Dec 19, 2023
f92cd4c
Update src/comgr.cpp
junliume Dec 19, 2023
b81d3a5
fix clang format issue
junliume Dec 19, 2023
c43de0c
Merge branch 'develop' into bump_version_fin
junliume Dec 19, 2023
b4d1236
Bump CK commit hash to d0f355a31a341b0a885ff65231781f332a20cc5f
junliume Dec 19, 2023
41c6172
Merge branch 'develop' into bump_version_fin
junliume Dec 20, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ if(NOT WIN32 AND NOT APPLE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
endif()

rocm_setup_version(VERSION 3.00.0)
rocm_setup_version(VERSION 3.1.0)

list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
include(TargetFlags)
Expand Down Expand Up @@ -625,6 +625,19 @@ enable_cppcheck(
knownConditionTrueFalse
shadowFunction
moduloofone
###################################################################
# TODO Code Quality WORKAROUND ROCm 6.0 &&
# Ubuntu 22.04 && cppcheck 2.12.1 update
###################################################################
duplInheritedMember
constParameterCallback
constParameterReference
constParameterPointer
constVariableReference
constVariablePointer
useStlAlgorithm
uselessOverride
unusedScopedObject
FORCE
SOURCES
addkernels/
Expand Down
24 changes: 15 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:20.04 as miopen
FROM ubuntu:22.04 as miopen
ARG DEBIAN_FRONTEND=noninteractive

# Support multiarch
Expand All @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg

RUN wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/focal/amdgpu-install_5.7.50701-1_all.deb --no-check-certificate
RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
./amdgpu-install_5.7.50701-1_all.deb
./amdgpu-install_6.0.60000-1_all.deb

# Add rocm repository
RUN export ROCM_APT_VER=5.7.1;\
RUN export ROCM_APT_VER=6.0;\
echo $ROCM_APT_VER &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"

RUN amdgpu-install -y --usecase=rocm --no-dkms

Expand Down Expand Up @@ -96,11 +96,17 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b
cd /tmp/ccache-${CCACHE_COMMIT}/build && \
cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/*
RUN ccache -s

# purge existing composable kernel installed with ROCm
# hence cannot use autoremove since it will remove more components
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \
composablekernel-dev
ARG COMPILER_LAUNCHER=""
RUN if [ "$USE_FIN" = "ON" ]; then \
rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
else \
rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
fi

RUN ccache -s
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ROCmSoftwarePlatform/rocm-recipes@d7b71f8ff71572833c8cf15b74279dd034e66f9d
-f requirements.txt
danmar/cppcheck@2.9
danmar/cppcheck@2.12.1
google/[email protected]
2 changes: 1 addition & 1 deletion docs/DebugAndLogging.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Direct Solutions:
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2<n>` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1`
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[email protected] -DCMAKE_POSITION_INDEPENDENT_CODE=On
[email protected] -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
facebook/[email protected] -X subdir -DCMAKE_DIR=build/cmake
ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Q] Where we'll take half after this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about apt-get install half ? :)

Now if we use amdgpu-install -y --usecase=rocm as part of the dockerfile script, both half and composablekernel will be pre-installed. Then if we try to install it via building dependencies there will be errors.

Copy link
Contributor

@atamazov atamazov Nov 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@junliume I recommend keeping this, but commenting this out with # (and maybe putting some short explanation). This may be useful for clients who use previous version of ROCm which does not install half by default.

ROCmSoftwarePlatform/[email protected] -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
# ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
ROCmSoftwarePlatform/[email protected] -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
nlohmann/[email protected] -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
ROCmSoftwarePlatform/[email protected]
ROCmSoftwarePlatform/[email protected]
ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
ROCmSoftwarePlatform/composable_kernel@d0f355a31a341b0a885ff65231781f332a20cc5f -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
endif()
if(ENABLE_HIP_WORKAROUNDS)
# Workaround hip not setting its usage requirements correctly
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 )
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
endif()
# This is helpful for the tests
target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
Expand Down
6 changes: 4 additions & 2 deletions src/comgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,8 +1293,10 @@ void BuildHip(const std::string& name,
miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
compiler::lc::RemoveOptionsUnwanted(opts);
opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
#endif
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
Expand Down
2 changes: 1 addition & 1 deletion src/composable_kernel/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CheckOptions:
- key: bugprone-reserved-identifier.AllowedIdentifiers
value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__'
value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__'
2 changes: 1 addition & 1 deletion src/composable_kernel/cmake/ClangTidy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET)
add_custom_target(${tidy_target}
# for some targets clang-tidy not able to get information from .clang-tidy
DEPENDS ${SOURCE}
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
)
Expand Down
10 changes: 5 additions & 5 deletions src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
junliume marked this conversation as resolved.
Show resolved Hide resolved
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
2 changes: 1 addition & 1 deletion src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
}
}

std::size_t out_c;
std::size_t out_c = 0;
std::vector<std::size_t> out_lens(spatial_dim + 2);

auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim);
Expand Down
10 changes: 5 additions & 5 deletions src/kernels/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
36 changes: 18 additions & 18 deletions src/kernels/float_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#define FOUR 4
#define EIGHT 8
#if MIOPEN_USE_FP8 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT hip_f8<miopen_f8::hip_f8_type::fp8>
#define FLOAT_ACCUM float
// HIP implements the correct operators for conversion
Expand All @@ -58,7 +58,7 @@
#endif // MIOPEN_USE_FP8

#if MIOPEN_USE_BFP8 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT hip_f8<miopen_f8::hip_f8_type::bf8>
#define FLOAT_ACCUM float
#else
Expand All @@ -79,7 +79,7 @@
// #endif
#endif // MIOPEN_USE_BFP8

#ifndef __HIP_PLATFORM_HCC__
#ifndef __HIP_PLATFORM_AMD__
#define _FLOAT2 PPCAT(_FLOAT, TWO)
#define _FLOAT4 PPCAT(_FLOAT, FOUR)
#define _FLOAT8 PPCAT(_FLOAT, EIGHT)
Expand All @@ -99,19 +99,19 @@
#endif

#if MIOPEN_USE_DOUBLE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT_ACCUM double
#else
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define _FLOAT_ACCUM double
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define MAX_VAL_ACCUM DBL_MAX
#else // MIOPEN_USE_DOUBLE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT_ACCUM float
#else
#define _FLOAT_ACCUM float
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#ifndef FLT_MAX
#define MAX_VAL_ACCUM 3.402823466e+38F
#else
Expand All @@ -120,12 +120,12 @@
#endif // MIOPEN_USE_DOUBLE_ACCUM

#if MIOPEN_USE_FP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT _Float16
#else // __HIP_PLATFORM_HCC__
#else // __HIP_PLATFORM_AMD__
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define _FLOAT half
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define SIZEOF_FLOAT 2
// Max value for the main datatype
#ifndef HALF_MAX
Expand All @@ -136,11 +136,11 @@
#endif // MIOPEN_USE_FP16

#if MIOPEN_USE_FP32 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT float
#else
#define _FLOAT float
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define SIZEOF_FLOAT 4
// Max value for the main datatype
#ifndef FLT_MAX
Expand All @@ -151,7 +151,7 @@
#endif // MIOPEN_USE_FP32

#if MIOPEN_USE_BFP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT ushort
#else
#define _FLOAT ushort
Expand All @@ -162,7 +162,7 @@
#endif // MIOPEN_USE_BFP16

#if MIOPEN_USE_FP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
#define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
#define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
Expand All @@ -188,7 +188,7 @@
/// refactoring should be considered as nontrivial and requires
/// a separate PR. Let's keep this historical stuff for now.
/// --atamazov 30.08.2023
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
#define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
#define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
Expand All @@ -202,7 +202,7 @@
#endif // MIOPEN_USE_FP32

#if MIOPEN_USE_BFP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
#define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED
#define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
Expand Down Expand Up @@ -232,7 +232,7 @@
#endif

#if MIOPEN_USE_NATIVE_DATATYPE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#undef FLOAT_ACCUM
#define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED
#else
Expand All @@ -250,7 +250,7 @@
#define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x))

#undef CVT_INTEGRAL2ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
#else
#if MIOPEN_USE_BFP16 == 1
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/hip_f8_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
// #include <half.hpp>
namespace miopen_hip_f8_impl {

#ifndef __HIP_PLATFORM_HCC__
#ifndef __HIP_PLATFORM_AMD__
using hip_bfloat16 = bfloat16;
using half = half_float::half;
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/hip_float8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#endif

// FP8 header version 0.4, 2021/05/11
#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE
#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE
// MIOpen by default does not have device code in the regular compilation paths,
// therefore, when this file is used from the host side, compilation takes much
// longer. By guarding the __device__ directive we can control that such compilation
Expand Down
Loading