From 22bb777f988d6a59e052e4a37e7f2eb0e99c5e10 Mon Sep 17 00:00:00 2001 From: Umang Yadav <29876643+umangyadav@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:39:53 -0400 Subject: [PATCH] Remove int8x4 format completely (#2373) --- .../custom_op_miopen_kernel.cpp | 18 +- examples/migraphx/migraphx_driver/README.md | 3 - src/targets/gpu/CMakeLists.txt | 8 - src/targets/gpu/compile_miopen.cpp | 19 +- src/targets/gpu/device/int8_gemm_pack.cpp | 97 ---- src/targets/gpu/gemm_impl.cpp | 16 +- .../include/migraphx/gpu/compile_miopen.hpp | 2 +- .../gpu/include/migraphx/gpu/convolution.hpp | 40 +- .../migraphx/gpu/device/int8_gemm_pack.hpp | 49 -- src/targets/gpu/include/migraphx/gpu/gemm.hpp | 12 +- .../gpu/include/migraphx/gpu/gemm_impl.hpp | 2 - .../include/migraphx/gpu/int8_conv_pack.hpp | 52 -- .../include/migraphx/gpu/int8_gemm_pack.hpp | 63 --- .../gpu/include/migraphx/gpu/miopen.hpp | 18 +- .../include/migraphx/gpu/pack_int8_args.hpp | 46 -- .../gpu/include/migraphx/gpu/rocblas.hpp | 2 - src/targets/gpu/int8_conv_pack.cpp | 78 --- src/targets/gpu/int8_gemm_pack.cpp | 60 --- src/targets/gpu/lowering.cpp | 20 +- src/targets/gpu/pack_int8_args.cpp | 225 --------- src/targets/gpu/rocblas.cpp | 13 - src/targets/gpu/target.cpp | 2 - test/gpu/pack_int8_args.cpp | 465 ------------------ ...conv_default_mode.cpp => quant_conv_1.cpp} | 2 +- ...nv_int8x4_default.cpp => quant_conv_2.cpp} | 2 +- 25 files changed, 31 insertions(+), 1283 deletions(-) delete mode 100644 src/targets/gpu/device/int8_gemm_pack.cpp delete mode 100644 src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp delete mode 100644 src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp delete mode 100644 src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp delete mode 100644 src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp delete mode 100644 src/targets/gpu/int8_conv_pack.cpp delete mode 100644 src/targets/gpu/int8_gemm_pack.cpp delete mode 100644 src/targets/gpu/pack_int8_args.cpp delete mode 100644 test/gpu/pack_int8_args.cpp rename test/verify/{quant_conv_default_mode.cpp => quant_conv_1.cpp} (96%) rename test/verify/{quant_conv_int8x4_default.cpp => quant_conv_2.cpp} (95%) diff --git a/examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp b/examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp index db38e47e6a5..ff0022b6886 100644 --- a/examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp +++ b/examples/migraphx/custom_op_miopen_kernel/custom_op_miopen_kernel.cpp @@ -32,7 +32,7 @@ #define MIGRAPHX_MIOPEN_ASSERT(x) (assert((x) == miopenStatusSuccess)) #define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess)) -inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, bool pack = false) +inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s) { miopenTensorDescriptor_t t; MIGRAPHX_MIOPEN_ASSERT(miopenCreateTensorDescriptor(&t)); @@ -49,23 +49,9 @@ inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, boo else if(s.type() == migraphx_shape_int32_type) d = miopenInt32; else if(s.type() == migraphx_shape_int8_type) - { - if(pack) - { - // update the lens and corresponding strides - d = miopenInt8x4; - lens[1] = ((lens[1] + 3) / 4) * 4; - strides[0] = strides[1] * lens[1]; - } - else - { - d = miopenInt8; - } - } + d = miopenInt8; else - { throw("MAKE_TENSOR: unsupported type"); - } miopenSetTensorDescriptor(t, d, s_lens.size(), lens.data(), strides.data()); return t; } diff --git a/examples/migraphx/migraphx_driver/README.md b/examples/migraphx/migraphx_driver/README.md index 972137ceab7..2dc2403b2fb 100755 --- a/examples/migraphx/migraphx_driver/README.md +++ b/examples/migraphx/migraphx_driver/README.md @@ -149,9 +149,6 @@ gpu::gelu gpu::gelu_new gpu::gemm gpu::greater -gpu::int8_conv_pack -gpu::int8_gemm_pack_a -gpu::int8_gemm_pack_b gpu::layernorm gpu::leaky_relu gpu::less diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt index 41a1c66b4d7..48b0e1e9e75 100644 --- a/src/targets/gpu/CMakeLists.txt +++ b/src/targets/gpu/CMakeLists.txt @@ -128,8 +128,6 @@ add_library(migraphx_gpu gather.cpp gemm_impl.cpp hip.cpp - int8_conv_pack.cpp - int8_gemm_pack.cpp kernel.cpp lowering.cpp logsoftmax.cpp @@ -140,7 +138,6 @@ add_library(migraphx_gpu no_device.cpp nonzero.cpp pack_args.cpp - pack_int8_args.cpp prefuse_ops.cpp pad.cpp perfdb.cpp @@ -184,7 +181,6 @@ register_migraphx_gpu_ops(hip_ register_migraphx_gpu_ops(miopen_ abs contiguous - int8_conv_pack lrn pooling ) @@ -192,10 +188,6 @@ register_op(migraphx_gpu HEADER migraphx/gpu/rnn_variable_seq_lens.hpp OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output INCLUDES migraphx/gpu/context.hpp) -register_op(migraphx_gpu - HEADER migraphx/gpu/int8_gemm_pack.hpp - OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b - INCLUDES migraphx/gpu/context.hpp) register_op(migraphx_gpu HEADER migraphx/gpu/gemm.hpp OPERATORS gpu::rocblas_gemm gpu::rocblas_gemm diff --git a/src/targets/gpu/compile_miopen.cpp b/src/targets/gpu/compile_miopen.cpp index a2405d6be6a..d1eb8118f2d 100644 --- a/src/targets/gpu/compile_miopen.cpp +++ b/src/targets/gpu/compile_miopen.cpp @@ -60,9 +60,8 @@ struct miopen_op }; MIGRAPHX_REGISTER_OP(miopen_op); -std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool format) const +std::size_t compile_miopen::compile(operation& op, instruction_ref ins) const { - op.from_value({{"int8_x4_format", format}}); auto v = op.compile(*ctx, ins->get_shape(), to_shapes(ins->inputs())); return v.get("workspace", 0); } @@ -70,25 +69,15 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for void compile_miopen::apply(module& m) const { assert(ctx); - const bool int8_x4_format = get_int8_x4_format(any_cast(*ctx)); for(auto ins : iterator_for(m)) { if(ins->name() != "gpu::miopen_op") continue; auto op = any_cast(ins->get_operator()).op; std::size_t ws = 0; - try - { - // for the regular convolution and convolution_backwards, this try would always succeed - ws = compile(op, ins, int8_x4_format); - } - catch(migraphx::exception&) - { - // In case no solver supports the default format, retry using the other format. - ws = compile(op, ins, not int8_x4_format); - } - auto inputs = ins->inputs(); - auto alloc = m.insert_instruction( + ws = compile(op, ins); + auto inputs = ins->inputs(); + auto alloc = m.insert_instruction( ins, make_op("allocate", {{"shape", to_value(shape{shape::int8_type, {ws}})}})); inputs.insert(std::prev(inputs.end()), alloc); diff --git a/src/targets/gpu/device/int8_gemm_pack.cpp b/src/targets/gpu/device/int8_gemm_pack.cpp deleted file mode 100644 index 7b682b77ab1..00000000000 --- a/src/targets/gpu/device/int8_gemm_pack.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { -namespace device { - -void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument& arg) -{ - auto comp_shape = arg.get_shape(); - auto out_lens = comp_shape.lens(); - auto dim_0 = out_lens.size() - 2; - auto dim_1 = out_lens.size() - 1; - std::size_t lda = comp_shape.strides()[dim_0]; - std::size_t m_size = out_lens[dim_0] * out_lens[dim_1]; - visit_all(result, arg)([&](auto output, auto input) { - std::size_t nelements = comp_shape.elements(); - auto* out_ptr = device_cast(output.data()); - auto* in_ptr = device_cast(input.data()); - visit_tensor_size(out_lens.size(), [&](auto out_dim) { - hip_tensor_descriptor desc(comp_shape); - gs_launch(stream, nelements, 256)([=](auto ii) __device__ { - const size_t nb = 4; - auto idx = desc.multi(ii); - std::size_t i_m = idx[dim_1]; - std::size_t i_k = idx[dim_0]; - std::size_t offset = ii / m_size * m_size; - out_ptr[i_k % nb + (i_m + (i_k / nb) * lda) * nb + offset] = - in_ptr[i_m + i_k * lda + offset]; - }); - }); - }); -} - -void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument& arg) -{ - auto trans_shape = arg.get_shape(); - auto out_lens = trans_shape.lens(); - auto dim_0 = trans_shape.lens().size() - 2; - auto dim_1 = trans_shape.lens().size() - 1; - std::size_t ldb = trans_shape.strides()[dim_1]; - - auto wrap_lens = out_lens; - std::swap(wrap_lens[dim_0], wrap_lens[dim_1]); - shape comp_shape{trans_shape.type(), wrap_lens}; - std::size_t m_size = out_lens[dim_0] * out_lens[dim_1]; - visit_all(result, arg)([&](auto output, auto input) { - std::size_t nelements = comp_shape.elements(); - auto* out_ptr = device_cast(output.data()); - auto* in_ptr = device_cast(input.data()); - visit_tensor_size(out_lens.size(), [&](auto out_dim) { - hip_tensor_descriptor desc(comp_shape); - gs_launch(stream, nelements, 256)([=](auto ii) __device__ { - const size_t nb = 4; - auto idx = desc.multi(ii); - std::size_t i_n = idx[dim_1]; - std::size_t i_k = idx[dim_0]; - std::size_t offset = ii / m_size * m_size; - out_ptr[i_k % nb + (i_n + (i_k / nb) * ldb) * nb + offset] = - in_ptr[i_n + i_k * ldb + offset]; - }); - }); - }); -} - -} // namespace device -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx diff --git a/src/targets/gpu/gemm_impl.cpp b/src/targets/gpu/gemm_impl.cpp index 1f908f6b300..f70a8f1c371 100644 --- a/src/targets/gpu/gemm_impl.cpp +++ b/src/targets/gpu/gemm_impl.cpp @@ -108,7 +108,6 @@ void gemm_impl(context& ctx, const std::vector& args, T alpha, T beta, - bool int8_x4_format, bool compute_fp32) { const bool is_3inputs = (args.size() == 4); @@ -141,11 +140,6 @@ void gemm_impl(context& ctx, } rocblas_gemm_flags flag = rocblas_gemm_flags_none; -#if ROCBLAS_VERSION_MAJOR < 3 - if(int8_x4_format) - flag = rocblas_gemm_flags_pack_int8x4; -#endif - auto a_lens = args[0].get_shape().lens(); auto b_lens = args[1].get_shape().lens(); output_shape.visit_type([&](auto as) { @@ -167,10 +161,6 @@ void gemm_impl(context& ctx, rocblas_int n = out_lens[dim_1]; rocblas_int k = args[0].get_shape().lens()[dim_1]; auto to_pointer = [&](auto&& arg) { return as.from(arg.data()); }; - if(args[0].get_shape().type() == shape::int8_type and (k % 4) != 0 and int8_x4_format) - { - MIGRAPHX_THROW("ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!"); - } auto num_matrices = std::accumulate( out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies()); @@ -256,10 +246,9 @@ void gemm(context& ctx, const std::vector& args, float alpha, float beta, - bool int8_x4_format, bool compute_fp32) { - gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32); + gemm_impl(ctx, output_shape, args, alpha, beta, compute_fp32); } void gemm(context& ctx, @@ -267,10 +256,9 @@ void gemm(context& ctx, const std::vector& args, int32_t alpha, int32_t beta, - bool int8_x4_format, bool compute_fp32) { - gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32); + gemm_impl(ctx, output_shape, args, alpha, beta, compute_fp32); } } // namespace gpu diff --git a/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp b/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp index dd81367656b..926ee94b66b 100644 --- a/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp +++ b/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp @@ -42,7 +42,7 @@ struct compile_miopen context* ctx = nullptr; std::string name() const { return "gpu::compile_miopen"; } void apply(module& m) const; - std::size_t compile(operation& op, instruction_ref ins, bool format) const; + std::size_t compile(operation& op, instruction_ref ins) const; }; } // namespace gpu diff --git a/src/targets/gpu/include/migraphx/gpu/convolution.hpp b/src/targets/gpu/include/migraphx/gpu/convolution.hpp index f88cee86855..313c726dc5b 100644 --- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp +++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp @@ -57,7 +57,6 @@ template struct miopen_convolution { Op op; - bool int8_x4_format = false; shared cd = nullptr; miopenConvFwdAlgorithm_t algo{}; #ifdef MIGRAPHX_HAS_FIND_2_API @@ -74,7 +73,6 @@ struct miopen_convolution f(self.solution_object, "solution_object"), #endif f(self.algo, "algo"), - f(self.int8_x4_format, "int8_x4_format"), f(self.solution_id, "solution_id")); } @@ -94,9 +92,9 @@ struct miopen_convolution argument compute(context& ctx, const shape& output_shape, const std::vector& args) const { - auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()), int8_x4_format); - auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()), int8_x4_format); - auto y_desc = make_tensor(reshape_if_1d(output_shape)); + auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape())); + auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape())); + auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto* miopen_stream_handle = ctx.get_stream().get_miopen(); auto workspace_size = args[2].get_shape().bytes(); @@ -162,8 +160,8 @@ struct miopen_convolution shape find(context& ctx, const shape& output_shape, const std::vector& inputs) { shape workspace_shape{}; - auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format); - auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format); + auto x_desc = make_tensor(reshape_if_1d(inputs[0])); + auto w_desc = make_tensor(reshape_if_1d(inputs[1])); auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto* miopen_stream_handle = ctx.get_stream().get_miopen(); @@ -179,13 +177,8 @@ struct miopen_convolution workspace_shape = shape{shape::int8_type, {workspace_size}}; - auto x_shape = inputs[0]; - auto w_shape = inputs[1]; - if(int8_x4_format) - { - x_shape = pack_int8_shape(x_shape); - w_shape = pack_int8_shape(w_shape); - } + const auto& x_shape = inputs[0]; + const auto& w_shape = inputs[1]; #ifdef MIGRAPHX_HAS_FIND_2_API { @@ -327,8 +320,8 @@ struct miopen_convolution ": workspace has changed during finalization."); } - auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format); - auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format); + auto x_desc = make_tensor(reshape_if_1d(inputs[0])); + auto w_desc = make_tensor(reshape_if_1d(inputs[1])); auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(), @@ -347,21 +340,6 @@ struct miopen_convolution { return shapes.size() - 1; } - - inline shape pack_int8_shape(const shape& s) const - { - if(s.type() != shape::int8_type) - { - return s; - } - - auto lens = s.lens(); - auto strides = s.strides(); - lens[1] = (lens[1] + 3) / 4 * 4; - strides[0] = strides[1] * lens[1]; - - return {s.type(), lens, strides}; - } }; } // namespace gpu diff --git a/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp b/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp deleted file mode 100644 index 25635cf3f09..00000000000 --- a/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP -#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP - -#include -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { -namespace device { - -void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_a(hipStream_t stream, - const argument& result, - const argument& arg); - -void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_b(hipStream_t stream, - const argument& result, - const argument& arg); - -} // namespace device -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx - -#endif diff --git a/src/targets/gpu/include/migraphx/gpu/gemm.hpp b/src/targets/gpu/include/migraphx/gpu/gemm.hpp index 8834645663a..0be0d394732 100644 --- a/src/targets/gpu/include/migraphx/gpu/gemm.hpp +++ b/src/targets/gpu/include/migraphx/gpu/gemm.hpp @@ -50,7 +50,6 @@ struct rocblas_gemm Op op; float alpha = 1; float beta = 0; - bool int8_x4_format = true; bool compute_fp32 = false; unsigned trans_batch = 0; @@ -60,7 +59,6 @@ struct rocblas_gemm return pack_join(migraphx::reflect(self.op, f), pack(f(self.alpha, "alpha"), f(self.beta, "beta"), - f(self.int8_x4_format, "int8_x4_format"), f(self.compute_fp32, "compute_fp32"), f(self.trans_batch, "trans_batch"))); } @@ -113,17 +111,11 @@ struct rocblas_gemm { if(this->name() == "gpu::gemm") { - gemm(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32); + gemm(ctx, output_shape, args, alpha, beta, compute_fp32); } else { - gemm(ctx, - output_shape, - args, - int32_t(alpha), - int32_t(beta), - int8_x4_format, - compute_fp32); + gemm(ctx, output_shape, args, int32_t(alpha), int32_t(beta), compute_fp32); } return args.back(); } diff --git a/src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp b/src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp index 198fddf98b2..c2af07ce75c 100644 --- a/src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp +++ b/src/targets/gpu/include/migraphx/gpu/gemm_impl.hpp @@ -37,14 +37,12 @@ void gemm(context& ctx, const std::vector& args, float alpha, float beta, - bool int8_x4_format, bool compute_fp32); void gemm(context& ctx, const shape& output_shape, const std::vector& args, int32_t alpha, int32_t beta, - bool int8_x4_format, bool compute_fp32); } // namespace gpu diff --git a/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp b/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp deleted file mode 100644 index ef63a5e5a6b..00000000000 --- a/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP -#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP - -#include -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { - -struct context; - -struct miopen_int8_conv_pack -{ - std::string name() const { return "gpu::int8_conv_pack"; } - shape compute_shape(const std::vector& inputs) const; - argument compute(context& ctx, const shape&, const std::vector& args) const; - std::ptrdiff_t output_alias(const std::vector& shapes) const - { - return shapes.size() - 1; - } -}; - -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx - -#endif diff --git a/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp b/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp deleted file mode 100644 index b3ba0d50a35..00000000000 --- a/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP -#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP - -#include -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { - -struct context; - -struct hip_int8_gemm_pack_a -{ - std::string name() const { return "gpu::int8_gemm_pack_a"; } - shape compute_shape(const std::vector& inputs) const; - argument compute(context& ctx, const shape&, const std::vector& args) const; - std::ptrdiff_t output_alias(const std::vector& shapes) const - { - return shapes.size() - 1; - } -}; - -struct hip_int8_gemm_pack_b -{ - std::string name() const { return "gpu::int8_gemm_pack_b"; } - shape compute_shape(const std::vector& inputs) const; - argument compute(context& ctx, const shape&, const std::vector& args) const; - std::ptrdiff_t output_alias(const std::vector& shapes) const - { - return shapes.size() - 1; - } -}; - -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx - -#endif diff --git a/src/targets/gpu/include/migraphx/gpu/miopen.hpp b/src/targets/gpu/include/migraphx/gpu/miopen.hpp index 6d03e69b6e6..8163579eb48 100644 --- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp +++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp @@ -127,7 +127,7 @@ inline void set_tensor_descriptor(miopenTensorArgumentId_t name, } #endif -inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = false) +inline tensor_descriptor make_tensor(const migraphx::shape& os) { auto s = os.normalize_standard(); auto t = make_obj(&miopenCreateTensorDescriptor); @@ -142,23 +142,9 @@ inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = fals else if(s.type() == shape::int32_type) d = miopenInt32; else if(s.type() == shape::int8_type) - { - if(pack) - { - // update the lens and corresponding strides - d = miopenInt8x4; - lens[1] = ((lens[1] + 3) / 4) * 4; - strides[0] = strides[1] * lens[1]; - } - else - { - d = miopenInt8; - } - } + d = miopenInt8; else - { MIGRAPHX_THROW("MAKE_TENSOR: unsupported type"); - } miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data()); return t; diff --git a/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp b/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp deleted file mode 100644 index c8d5b5bd5e4..00000000000 --- a/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP -#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP - -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { - -namespace gpu { - -struct MIGRAPHX_GPU_EXPORT pack_int8_args -{ - std::string name() const { return "gpu::pack_int8_args"; } - void apply(module& m) const; - shape pack_int8_shape(const shape& s) const; -}; - -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx - -#endif diff --git a/src/targets/gpu/include/migraphx/gpu/rocblas.hpp b/src/targets/gpu/include/migraphx/gpu/rocblas.hpp index 6388fefccd8..c9775322a19 100644 --- a/src/targets/gpu/include/migraphx/gpu/rocblas.hpp +++ b/src/targets/gpu/include/migraphx/gpu/rocblas.hpp @@ -40,8 +40,6 @@ struct context; MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag(); -MIGRAPHX_GPU_EXPORT bool get_int8_x4_format(context& ctx); - } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/targets/gpu/int8_conv_pack.cpp b/src/targets/gpu/int8_conv_pack.cpp deleted file mode 100644 index 04abf6a5617..00000000000 --- a/src/targets/gpu/int8_conv_pack.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { - -shape pack_int8_shape(const shape& s) -{ - if(s.type() != shape::int8_type) - { - MIGRAPHX_THROW("PACK_INT8_ARGS: only process int8_type"); - } - - auto lens = s.lens(); - auto strides = s.strides(); - lens[1] = (lens[1] + 3) / 4 * 4; - strides[0] = strides[1] * lens[1]; - - return {s.type(), lens, strides}; -} - -shape miopen_int8_conv_pack::compute_shape(const std::vector& inputs) const -{ - check_shapes{{inputs.at(0)}, *this}.has(1).standard(); - return pack_int8_shape(inputs.at(0)); -} - -argument -miopen_int8_conv_pack::compute(context& ctx, const shape&, const std::vector& args) const -{ - auto arg_desc = make_tensor(args[0].get_shape()); - auto arg_desc_vec4 = make_tensor(args[0].get_shape(), true); - - float alpha = 1; - float beta = 0; - // pack input to vec4 format - auto status = miopenTransformTensor(ctx.get_stream().get_miopen(), - &alpha, - arg_desc.get(), - args[0].implicit(), - &beta, - arg_desc_vec4.get(), - args[1].implicit()); - if(status != miopenStatusSuccess) - { - MIGRAPHX_THROW("INT8_CONV_PACK: transform input tensor failed"); - } - - return args[1]; -} - -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx diff --git a/src/targets/gpu/int8_gemm_pack.cpp b/src/targets/gpu/int8_gemm_pack.cpp deleted file mode 100644 index 72d304e0d1e..00000000000 --- a/src/targets/gpu/int8_gemm_pack.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { - -shape hip_int8_gemm_pack_a::compute_shape(const std::vector& inputs) const -{ - check_shapes{{inputs.at(0)}, *this}.has(1).not_broadcasted().packed(); - return inputs.at(0); -} - -argument -hip_int8_gemm_pack_a::compute(context& ctx, const shape&, const std::vector& args) const -{ - device::int8_gemm_pack_a(ctx.get_stream().get(), args[1], args[0]); - return args[1]; -} - -shape hip_int8_gemm_pack_b::compute_shape(const std::vector& inputs) const -{ - check_shapes{{inputs.at(0)}, *this}.has(1).not_broadcasted().packed(); - return inputs.at(0); -} - -argument -hip_int8_gemm_pack_b::compute(context& ctx, const shape&, const std::vector& args) const -{ - device::int8_gemm_pack_b(ctx.get_stream().get(), args[1], args[0]); - return args[1]; -} - -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx diff --git a/src/targets/gpu/lowering.cpp b/src/targets/gpu/lowering.cpp index d0357f06c63..ea0e29f8853 100644 --- a/src/targets/gpu/lowering.cpp +++ b/src/targets/gpu/lowering.cpp @@ -61,9 +61,8 @@ struct miopen_apply const lowering* pass = nullptr; std::unordered_map> apply_map{}; instruction_ref last{}; - bool offload_copy = false; - bool int8_x4_format = true; - bool compute_fp32 = false; + bool offload_copy = false; + bool compute_fp32 = false; context& get_context() const { @@ -84,10 +83,8 @@ struct miopen_apply assert(mod != nullptr); assert(pass != nullptr); - auto& ctx = get_context(); - int8_x4_format = get_int8_x4_format(ctx); - compute_fp32 = get_compute_fp32_flag(); - offload_copy = (mod == mpm->get_root_module()) ? pass->offload_copy : false; + compute_fp32 = get_compute_fp32_flag(); + offload_copy = (mod == mpm->get_root_module()) ? pass->offload_copy : false; add_generic_op("contiguous"); add_extend_op("argmax"); @@ -231,18 +228,15 @@ struct miopen_apply assert(refs.size() == 2); auto output = insert_allocation(ins, ins->get_shape()); refs.push_back(output); - return mod->replace_instruction( - ins, rocblas_gemm{Op{}, 1, 0, int8_x4_format, compute_fp32}, refs); + return mod->replace_instruction(ins, rocblas_gemm{Op{}, 1, 0, compute_fp32}, refs); }); } void add_convolution_op(const std::string& name) { apply_map.emplace(name, [=](instruction_ref ins) { - operation conv = make_op( - "gpu::" + name, - {{"op", ins->get_operator().to_value()}, {"int8_x4_format", int8_x4_format}}); - auto output = insert_allocation(ins, ins->get_shape()); + operation conv = make_op("gpu::" + name, {{"op", ins->get_operator().to_value()}}); + auto output = insert_allocation(ins, ins->get_shape()); return mod->replace_instruction(ins, make_op("gpu::miopen_op", {{"op", to_value(conv)}}), diff --git a/src/targets/gpu/pack_int8_args.cpp b/src/targets/gpu/pack_int8_args.cpp deleted file mode 100644 index d2b2d5ec002..00000000000 --- a/src/targets/gpu/pack_int8_args.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -namespace gpu { - -static instruction_ref pad_ins(module& m, instruction_ref ins, int offset) -{ - auto s = ins->get_shape(); - auto lens = s.lens(); - auto k = lens[lens.size() + offset]; - auto pad_k = (k + 3) / 4 * 4; - auto pad_lens = lens; - pad_lens[lens.size() + offset] = pad_k; - auto ret_ins = ins; - if(pad_k != k) - { - std::vector pad_dims(lens.size() * 2, 0); - pad_dims[lens.size() + offset] = pad_k - k; - shape ps{s.type(), pad_lens}; - auto ins_out = - m.insert_instruction(ins, make_op("hip::allocate", {{"shape", to_value(ps)}})); - auto pad = make_op("pad", {{"pads", pad_dims}}); - ret_ins = - m.insert_instruction(std::next(ins), make_op("gpu::pad", pad.to_value()), ins, ins_out); - } - - return ret_ins; -} - -static std::vector pad_inputs(module& m, instruction_ref ins) -{ - std::vector ret_inputs; - auto inputs = ins->inputs(); - auto in0 = inputs.at(0); - auto sa = in0->get_shape(); - bool transa = sa.transposed(); - if(transa) - { - auto perm = find_permutation(sa); - auto val = in0->get_operator().to_value(); - if(val.contains("dims")) - { - int offset = static_cast(perm.back()) - static_cast(perm.size()); - auto t_in = in0->inputs().front(); - auto p_in = pad_ins(m, t_in, offset); - auto dims = val.at("dims").to_vector(); - auto r_in = - m.insert_instruction(ins, make_op("transpose", {{"permutation", dims}}), p_in); - ret_inputs.push_back(r_in); - } - else - { - shape cs{in0->get_shape().type(), in0->get_shape().lens()}; - auto con_out = - m.insert_instruction(ins, make_op("hip::allocate", {{"shape", to_value(cs)}})); - auto cin0 = m.insert_instruction(ins, make_op("gpu::contiguous"), in0, con_out); - ret_inputs.push_back(pad_ins(m, cin0, -1)); - } - } - else - { - ret_inputs.push_back(pad_ins(m, in0, -1)); - } - - auto in1 = inputs.at(1); - auto sb = in1->get_shape(); - bool transb = sb.transposed(); - if(transb) - { - auto perm = find_permutation(sb); - auto val = in1->get_operator().to_value(); - if(val.contains("dims")) - { - int offset = static_cast(perm[perm.size() - 2]) - static_cast(perm.size()); - auto t_in = in1->inputs().front(); - auto p_in = pad_ins(m, t_in, offset); - auto dims = val.at("dims").to_vector(); - auto r_in = - m.insert_instruction(ins, make_op("transpose", {{"permutation", dims}}), p_in); - ret_inputs.push_back(r_in); - } - else - { - shape cs{in1->get_shape().type(), in1->get_shape().lens()}; - auto con_out = - m.insert_instruction(ins, make_op("hip::allocate", {{"shape", to_value(cs)}})); - auto cin1 = m.insert_instruction(ins, make_op("gpu::contiguous"), in1, con_out); - ret_inputs.push_back(pad_ins(m, cin1, -2)); - } - } - else - { - ret_inputs.push_back(pad_ins(m, in1, -2)); - } - std::copy(inputs.begin() + 2, inputs.end(), std::back_inserter(ret_inputs)); - - return ret_inputs; -} - -void pack_int8_args::apply(module& m) const -{ - for(auto ins : iterator_for(m)) - { - if(ins->name() == "gpu::quant_gemm") - { - auto val = ins->get_operator().to_value(); - assert(val.contains("int8_x4_format")); - if(not val.at("int8_x4_format").to()) - { - continue; - } - auto inputs = ins->inputs(); - auto lens = inputs.at(0)->get_shape().lens(); - // gemm need the k to be multiple of 4, so need packing that dimension - auto old_inputs = inputs; - if((lens.back() % 4) != 0) - { - inputs = pad_inputs(m, ins); - } - - bool transa = inputs[0]->get_shape().transposed(); - bool transb = inputs[1]->get_shape().transposed(); - if(not transb) - { - auto packed_b = m.insert_instruction( - ins, make_op("hip::allocate", {{"shape", to_value(inputs[1]->get_shape())}})); - auto output_b = m.insert_instruction( - ins, make_op("gpu::int8_gemm_pack_a"), {inputs[1], packed_b}); - inputs[1] = output_b; - } - - if(transa) - { - auto packed_a = m.insert_instruction( - ins, make_op("hip::allocate", {{"shape", to_value(inputs[0]->get_shape())}})); - auto output_a = m.insert_instruction( - ins, make_op("gpu::int8_gemm_pack_b"), {inputs[0], packed_a}); - inputs[0] = output_a; - } - - if(inputs != old_inputs) - { - m.replace_instruction(ins, ins->get_operator(), inputs); - } - } - else if(ins->name() == "gpu::quant_convolution") - { - auto val = ins->get_operator().to_value(); - if(not val.at("int8_x4_format").to()) - { - continue; - } - - auto inputs = ins->inputs(); - auto packed_x = m.insert_instruction( - ins, - make_op("hip::allocate", - {{"shape", to_value(pack_int8_shape(inputs[0]->get_shape()))}})); - auto output_x = - m.insert_instruction(ins, make_op("gpu::int8_conv_pack"), {inputs[0], packed_x}); - instruction::replace_argument(ins, inputs[0], output_x); - - auto packed_w = m.insert_instruction( - ins, - make_op("hip::allocate", - {{"shape", to_value(pack_int8_shape(inputs[1]->get_shape()))}})); - auto output_w = - m.insert_instruction(ins, make_op("gpu::int8_conv_pack"), {inputs[1], packed_w}); - instruction::replace_argument(ins, inputs[1], output_w); - } - } -} - -shape pack_int8_args::pack_int8_shape(const shape& s) const -{ - if(s.type() != shape::int8_type) - { - MIGRAPHX_THROW("PACK_INT8_ARGS: only process int8_type"); - } - - auto lens = s.lens(); - auto strides = s.strides(); - lens[1] = (lens[1] + 3) / 4 * 4; - strides[0] = strides[1] * lens[1]; - - return {s.type(), lens, strides}; -} - -} // namespace gpu -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx diff --git a/src/targets/gpu/rocblas.cpp b/src/targets/gpu/rocblas.cpp index 16516b915f6..0a0faa4719f 100644 --- a/src/targets/gpu/rocblas.cpp +++ b/src/targets/gpu/rocblas.cpp @@ -53,19 +53,6 @@ bool get_compute_fp32_flag() return (starts_with(device_name, "gfx9") and device_name >= "gfx908"); } -bool get_int8_x4_format(context& ctx) -{ -#if ROCBLAS_VERSION_MAJOR >= 3 - (void)(ctx); - return false; -#else - // int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in - // v3.0 and will be removed in v4.0 - rocblas_gemm_flags flag; - rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag); - return flag == rocblas_gemm_flags_pack_int8x4; -#endif -} } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/targets/gpu/target.cpp b/src/targets/gpu/target.cpp index faefd3ab155..95455ee9ef0 100644 --- a/src/targets/gpu/target.cpp +++ b/src/targets/gpu/target.cpp @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -154,7 +153,6 @@ std::vector target::get_passes(migraphx::context& gctx, const compile_opti dead_code_elimination{}, compile_miopen{&gctx}, dead_code_elimination{}, - pack_int8_args{}, dead_code_elimination{}, fuse_ops{&ctx, options.fast_math}, dead_code_elimination{}, diff --git a/test/gpu/pack_int8_args.cpp b/test/gpu/pack_int8_args.cpp deleted file mode 100644 index c94d6b23f16..00000000000 --- a/test/gpu/pack_int8_args.cpp +++ /dev/null @@ -1,465 +0,0 @@ -/* - * The MIT License (MIT) - * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "make_precompile_op.hpp" - -// Treat some operators as compilable to enable lowering -MIGRAPHX_GPU_TEST_PRECOMPILE("add", "mul", "convert") - -void run_passes(migraphx::module& m, migraphx::gpu::context& ctx) -{ - migraphx::run_passes(m, - {migraphx::auto_contiguous{}, - migraphx::gpu::lowering{&ctx, false}, - migraphx::dead_code_elimination{}, - migraphx::replace_allocate{migraphx::gpu::gpu_allocation_model{}}, - migraphx::dead_code_elimination{}, - migraphx::gpu::pack_int8_args{}, - migraphx::dead_code_elimination{}}); -} - -TEST_CASE(quant_dot) -{ - auto create_module = [] { - migraphx::module m("test"); - migraphx::shape m1_shape{migraphx::shape::int8_type, {5, 8}}; - migraphx::shape m2_shape{migraphx::shape::int8_type, {8, 7}}; - migraphx::shape m3_shape{migraphx::shape::int32_type, {5, 7}}; - - auto l1 = m.add_parameter("a", m1_shape); - auto l2 = m.add_parameter("b", m2_shape); - auto l3 = m.add_parameter("c", m3_shape); - auto r = - migraphx::add_apply_alpha_beta(m, {l1, l2, l3}, migraphx::make_op("quant_dot"), 1, 1); - m.add_return({r}); - return m; - }; - - auto create_optimized_int8_x4 = [](bool int8_x4) { - migraphx::module m("test"); - migraphx::shape m1_shape{migraphx::shape::int8_type, {5, 8}}; - migraphx::shape m2_shape{migraphx::shape::int8_type, {8, 7}}; - migraphx::shape m3_shape{migraphx::shape::int32_type, {5, 7}}; - - auto l1 = m.add_parameter("a", m1_shape); - auto l2 = m.add_parameter("b", m2_shape); - auto l3 = m.add_parameter("c", m3_shape); - auto beta = m.add_literal(1); - auto output = m.add_parameter("test:#output_0", m3_shape); - auto gemm_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(m3_shape)}})); - - auto packa = l2; - if(int8_x4) - { - auto alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(m2_shape)}})); - packa = m.add_instruction(migraphx::make_op("gpu::int8_gemm_pack_a"), l2, alloc); - } - auto gemm = m.add_instruction( - migraphx::make_op("gpu::quant_gemm", - {{"int8_x4_format", int8_x4}, - {"compute_fp32", migraphx::gpu::get_compute_fp32_flag()}}), - l1, - packa, - gemm_alloc); - - auto beta_broadcast = m.add_instruction( - migraphx::make_op("multibroadcast", {{"out_lens", m3_shape.lens()}}), beta); - auto beta_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(m3_shape)}})); - auto beta_contiguous = - m.add_instruction(migraphx::make_op("gpu::contiguous"), beta_broadcast, beta_alloc); - auto mul_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(m3_shape)}})); - auto m3_beta = m.add_instruction(make_precompile_op("mul"), l3, beta_contiguous, mul_alloc); - auto gemm_add = m.add_instruction(make_precompile_op("add"), gemm, m3_beta, output); - m.add_return({gemm_add}); - - return m; - }; - - auto m1 = create_module(); - auto ctx = migraphx::gpu::context{}; - run_passes(m1, ctx); - - bool int8_x4 = migraphx::gpu::get_int8_x4_format(ctx); - auto m2 = create_optimized_int8_x4(int8_x4); - EXPECT(m1 == m2); -} - -TEST_CASE(quant_dot_trans) -{ - auto create_module = [] { - migraphx::module m("test"); - migraphx::shape s1{migraphx::shape::int8_type, {3, 2, 8, 5}}; - migraphx::shape s2{migraphx::shape::int8_type, {3, 2, 7, 8}}; - - auto l1 = m.add_parameter("a", s1); - auto tl1 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l1); - auto l2 = m.add_parameter("b", s2); - auto tl2 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l2); - auto r = migraphx::add_apply_alpha_beta(m, {tl1, tl2}, migraphx::make_op("quant_dot"), 3); - m.add_return({r}); - return m; - }; - - auto create_optimized_int8_x4 = [](bool int8_x4) { - migraphx::module m("test"); - migraphx::shape s1{migraphx::shape::int8_type, {3, 2, 8, 5}}; - migraphx::shape s2{migraphx::shape::int8_type, {3, 2, 7, 8}}; - migraphx::shape s3{migraphx::shape::int32_type, {3, 2, 5, 7}}; - - auto l1 = m.add_parameter("a", s1); - auto l2 = m.add_parameter("b", s2); - auto alpha = m.add_literal(3); - auto output = m.add_parameter("test:#output_0", s3); - - auto tl1 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l1); - migraphx::shape ts1{migraphx::shape::int8_type, {3, 2, 5, 8}}; - auto alloca = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ts1)}})); - auto conta = m.add_instruction(migraphx::make_op("gpu::contiguous"), tl1, alloca); - - auto tl2 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l2); - migraphx::shape ts2{migraphx::shape::int8_type, {3, 2, 8, 7}}; - auto allocb = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ts2)}})); - auto contb = m.add_instruction(migraphx::make_op("gpu::contiguous"), tl2, allocb); - - auto alpha_broadcast = m.add_instruction( - migraphx::make_op("multibroadcast", {{"out_lens", conta->get_shape().lens()}}), alpha); - auto alpha_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", - {{"shape", - migraphx::to_value(migraphx::shape(migraphx::shape::int32_type, {3, 2, 5, 8}))}})); - auto alpha_contiguous = - m.add_instruction(migraphx::make_op("gpu::contiguous"), alpha_broadcast, alpha_alloc); - // alpha = int32 and tl1 = int8, convert tl1 to int32 for multiplication and then convert - // back result to int8 - auto tl1_convert_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", {{"shape", migraphx::to_value(alpha_contiguous->get_shape())}})); - auto tl1_convert = - m.add_instruction(make_precompile_op(migraphx::make_op( - "convert", {{"target_type", alpha->get_shape().type()}})), - conta, - tl1_convert_alloc); - auto mul_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", {{"shape", migraphx::to_value(tl1_convert->get_shape())}})); - auto tl1_alpha_int32 = - m.add_instruction(make_precompile_op("mul"), alpha_contiguous, tl1_convert, mul_alloc); - // convert mul_res to int8 - auto tl1_alpha_int8_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", {{"shape", migraphx::to_value(conta->get_shape())}})); - auto tl1_alpha_int8 = - m.add_instruction(make_precompile_op(migraphx::make_op( - "convert", {{"target_type", conta->get_shape().type()}})), - tl1_alpha_int32, - tl1_alpha_int8_alloc); - - auto packb = contb; - if(int8_x4) - { - auto allocpb = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ts2)}})); - packb = m.add_instruction(migraphx::make_op("gpu::int8_gemm_pack_a"), contb, allocpb); - } - - auto gemm = m.add_instruction( - migraphx::make_op("gpu::quant_gemm", - {{"int8_x4_format", int8_x4}, - {"compute_fp32", migraphx::gpu::get_compute_fp32_flag()}}), - tl1_alpha_int8, - packb, - output); - m.add_return({gemm}); - - return m; - }; - - auto m1 = create_module(); - auto ctx = migraphx::gpu::context{}; - run_passes(m1, ctx); - - bool int8_x4 = migraphx::gpu::get_int8_x4_format(ctx); - auto m2 = create_optimized_int8_x4(int8_x4); - - EXPECT(m1 == m2); -} - -TEST_CASE(quant_dot_pad) -{ - auto create_module = [] { - migraphx::module m("test"); - migraphx::shape s1{migraphx::shape::int8_type, {5, 6}}; - migraphx::shape s2{migraphx::shape::int8_type, {6, 7}}; - migraphx::shape s3{migraphx::shape::int32_type, {5, 7}}; - - auto l1 = m.add_parameter("a", s1); - auto l2 = m.add_parameter("b", s2); - auto l3 = m.add_parameter("c", s3); - auto r = - migraphx::add_apply_alpha_beta(m, {l1, l2, l3}, migraphx::make_op("quant_dot"), 1, 1); - m.add_return({r}); - return m; - }; - - auto create_optimized_int8_x4 = [](bool int8_x4) { - migraphx::module m("test"); - migraphx::shape s1{migraphx::shape::int8_type, {5, 6}}; - migraphx::shape ps1{migraphx::shape::int8_type, {5, 8}}; - migraphx::shape s2{migraphx::shape::int8_type, {6, 7}}; - migraphx::shape ps2{migraphx::shape::int8_type, {8, 7}}; - migraphx::shape s3{migraphx::shape::int32_type, {5, 7}}; - - auto l1 = m.add_parameter("a", s1); - auto l2 = m.add_parameter("b", s2); - auto l3 = m.add_parameter("c", s3); - auto beta = m.add_literal(1); - auto output = m.add_parameter("test:#output_0", s3); - - auto pl1 = l1; - auto packa = l2; - migraphx::instruction_ref pl2{}; - if(int8_x4) - { - auto po1 = m.insert_instruction( - l1, migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ps1)}})); - pl1 = m.add_instruction( - migraphx::make_op("gpu::pad", {{"mode", 0}, {"pads", {0, 2, 0, 0}}, {"value", 0}}), - l1, - po1); - - auto po2 = m.insert_instruction( - l2, migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ps2)}})); - pl2 = m.insert_instruction( - std::next(l2), - migraphx::make_op("gpu::pad", {{"mode", 0}, {"pads", {2, 0, 0, 0}}, {"value", 0}}), - l2, - po2); - } - - auto gemm_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(s3)}})); - - if(int8_x4) - { - auto alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ps2)}})); - packa = m.add_instruction(migraphx::make_op("gpu::int8_gemm_pack_a"), pl2, alloc); - } - - auto gemm = m.add_instruction( - migraphx::make_op("gpu::quant_gemm", - {{"int8_x4_format", int8_x4}, - {"compute_fp32", migraphx::gpu::get_compute_fp32_flag()}}), - pl1, - packa, - gemm_alloc); - - auto beta_broadcast = - m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s3.lens()}}), beta); - auto beta_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(s3)}})); - auto beta_contiguous = - m.add_instruction(migraphx::make_op("gpu::contiguous"), beta_broadcast, beta_alloc); - auto mul_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(s3)}})); - auto m3_beta = m.add_instruction(make_precompile_op("mul"), l3, beta_contiguous, mul_alloc); - auto gemm_add = m.add_instruction(make_precompile_op("add"), gemm, m3_beta, output); - m.add_return({gemm_add}); - return m; - }; - - auto m1 = create_module(); - auto ctx = migraphx::gpu::context{}; - run_passes(m1, ctx); - - bool int8_x4 = migraphx::gpu::get_int8_x4_format(ctx); - auto m2 = create_optimized_int8_x4(int8_x4); - - EXPECT(m1 == m2); -} - -TEST_CASE(quant_dot_trans_pad) -{ - auto create_module = [] { - migraphx::module m("test"); - migraphx::shape s1{migraphx::shape::int8_type, {3, 2, 9, 5}}; - migraphx::shape s2{migraphx::shape::int8_type, {3, 2, 7, 9}}; - - auto l1 = m.add_parameter("a", s1); - auto tl1 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l1); - auto l2 = m.add_parameter("b", s2); - auto tl2 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l2); - auto r = migraphx::add_apply_alpha_beta(m, {tl1, tl2}, migraphx::make_op("quant_dot"), 3); - m.add_return({r}); - return m; - }; - - auto create_optimized_int8_x4 = [](bool int8_x4) { - migraphx::module m("test"); - migraphx::shape s1{migraphx::shape::int8_type, {3, 2, 9, 5}}; - migraphx::shape ps1{migraphx::shape::int8_type, {3, 2, 5, 12}}; - migraphx::shape s2{migraphx::shape::int8_type, {3, 2, 7, 9}}; - migraphx::shape ps2{migraphx::shape::int8_type, {3, 2, 12, 7}}; - migraphx::shape s3{migraphx::shape::int32_type, {3, 2, 5, 7}}; - - auto l1 = m.add_parameter("a", s1); - auto l2 = m.add_parameter("b", s2); - auto alpha = m.add_literal(3); - auto output = m.add_parameter("test:#output_0", s3); - - auto tl1 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l1); - migraphx::shape ts1{migraphx::shape::int8_type, {3, 2, 5, 9}}; - auto ta = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ts1)}})); - auto conta = m.add_instruction(migraphx::make_op("gpu::contiguous"), tl1, ta); - - auto tl2 = - m.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), l2); - migraphx::shape ts2{migraphx::shape::int8_type, {3, 2, 9, 7}}; - auto tb = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ts2)}})); - - migraphx::instruction_ref ptb{}; - if(int8_x4) - { - ptb = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ps2)}})); - } - auto contb = m.add_instruction(migraphx::make_op("gpu::contiguous"), tl2, tb); - auto pb = contb; - if(int8_x4) - { - pb = m.add_instruction( - migraphx::make_op("gpu::pad", {{"mode", 0}, {"pads", {0, 0, 3, 0, 0, 0, 0, 0}}}), - contb, - ptb); - } - - auto alpha_broadcast = m.add_instruction( - migraphx::make_op("multibroadcast", {{"out_lens", conta->get_shape().lens()}}), alpha); - auto alpha_alloc = m.add_instruction( - migraphx::make_op("hip::allocate", - {{"shape", - migraphx::to_value(migraphx::shape(migraphx::shape::int32_type, - conta->get_shape().lens()))}})); - auto alpha_contiguous = - m.add_instruction(migraphx::make_op("gpu::contiguous"), alpha_broadcast, alpha_alloc); - - // alpha = int32 and tl1 = int8, convert tl1 to int32 for multiplication and then convert - // back result to int8 - auto tl1_convert_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", {{"shape", migraphx::to_value(alpha_contiguous->get_shape())}})); - auto tl1_convert = - m.add_instruction(make_precompile_op(migraphx::make_op( - "convert", {{"target_type", alpha->get_shape().type()}})), - conta, - tl1_convert_alloc); - auto mul_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", {{"shape", migraphx::to_value(tl1_convert->get_shape())}})); - auto tl1_alpha_int32 = - m.add_instruction(make_precompile_op("mul"), alpha_contiguous, tl1_convert, mul_alloc); - // convert mul_res to int8 - auto tl1_alpha_int8_alloc = m.add_instruction(migraphx::make_op( - "hip::allocate", {{"shape", migraphx::to_value(conta->get_shape())}})); - - migraphx::instruction_ref pta{}; - if(int8_x4) - { - pta = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ps1)}})); - } - - auto tl1_alpha_int8 = - m.add_instruction(make_precompile_op(migraphx::make_op( - "convert", {{"target_type", conta->get_shape().type()}})), - tl1_alpha_int32, - tl1_alpha_int8_alloc); - - auto pa = tl1_alpha_int8; - if(int8_x4) - { - pa = m.add_instruction( - migraphx::make_op("gpu::pad", {{"mode", 0}, {"pads", {0, 0, 0, 3, 0, 0, 0, 0}}}), - tl1_alpha_int8, - pta); - } - - auto packb = pb; - if(int8_x4) - { - auto allocpb = m.add_instruction( - migraphx::make_op("hip::allocate", {{"shape", migraphx::to_value(ps2)}})); - packb = m.add_instruction(migraphx::make_op("gpu::int8_gemm_pack_a"), pb, allocpb); - } - - auto gemm = m.add_instruction( - migraphx::make_op("gpu::quant_gemm", - {{"int8_x4_format", int8_x4}, - {"compute_fp32", migraphx::gpu::get_compute_fp32_flag()}}), - pa, - packb, - output); - m.add_return({gemm}); - - return m; - }; - - auto m1 = create_module(); - auto ctx = migraphx::gpu::context{}; - run_passes(m1, ctx); - - bool int8_x4 = migraphx::gpu::get_int8_x4_format(ctx); - auto m2 = create_optimized_int8_x4(int8_x4); - - EXPECT(m1 == m2); -} - -int main(int argc, const char* argv[]) { test::run(argc, argv); } diff --git a/test/verify/quant_conv_default_mode.cpp b/test/verify/quant_conv_1.cpp similarity index 96% rename from test/verify/quant_conv_default_mode.cpp rename to test/verify/quant_conv_1.cpp index 256854d9237..928badbd7cb 100644 --- a/test/verify/quant_conv_default_mode.cpp +++ b/test/verify/quant_conv_1.cpp @@ -27,7 +27,7 @@ #include #include -struct quant_conv_default_mode : verify_program +struct quant_conv_1 : verify_program { migraphx::program create_program() const { diff --git a/test/verify/quant_conv_int8x4_default.cpp b/test/verify/quant_conv_2.cpp similarity index 95% rename from test/verify/quant_conv_int8x4_default.cpp rename to test/verify/quant_conv_2.cpp index eafbbb95613..9ae561f732b 100644 --- a/test/verify/quant_conv_int8x4_default.cpp +++ b/test/verify/quant_conv_2.cpp @@ -27,7 +27,7 @@ #include #include -struct quant_conv_int8x4_default : verify_program +struct quant_conv_2 : verify_program { migraphx::program create_program() const {