Skip to content

Commit

Permalink
Remove int8x4 format completely (#2373)
Browse files Browse the repository at this point in the history
  • Loading branch information
umangyadav authored Oct 30, 2023
1 parent 409fd18 commit 22bb777
Show file tree
Hide file tree
Showing 25 changed files with 31 additions and 1,283 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define MIGRAPHX_MIOPEN_ASSERT(x) (assert((x) == miopenStatusSuccess))
#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))

inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, bool pack = false)
inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s)
{
miopenTensorDescriptor_t t;
MIGRAPHX_MIOPEN_ASSERT(miopenCreateTensorDescriptor(&t));
Expand All @@ -49,23 +49,9 @@ inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, boo
else if(s.type() == migraphx_shape_int32_type)
d = miopenInt32;
else if(s.type() == migraphx_shape_int8_type)
{
if(pack)
{
// update the lens and corresponding strides
d = miopenInt8x4;
lens[1] = ((lens[1] + 3) / 4) * 4;
strides[0] = strides[1] * lens[1];
}
else
{
d = miopenInt8;
}
}
d = miopenInt8;
else
{
throw("MAKE_TENSOR: unsupported type");
}
miopenSetTensorDescriptor(t, d, s_lens.size(), lens.data(), strides.data());
return t;
}
Expand Down
3 changes: 0 additions & 3 deletions examples/migraphx/migraphx_driver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,6 @@ gpu::gelu
gpu::gelu_new
gpu::gemm
gpu::greater
gpu::int8_conv_pack
gpu::int8_gemm_pack_a
gpu::int8_gemm_pack_b
gpu::layernorm
gpu::leaky_relu
gpu::less
Expand Down
8 changes: 0 additions & 8 deletions src/targets/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ add_library(migraphx_gpu
gather.cpp
gemm_impl.cpp
hip.cpp
int8_conv_pack.cpp
int8_gemm_pack.cpp
kernel.cpp
lowering.cpp
logsoftmax.cpp
Expand All @@ -140,7 +138,6 @@ add_library(migraphx_gpu
no_device.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
perfdb.cpp
Expand Down Expand Up @@ -184,18 +181,13 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops(miopen_
abs
contiguous
int8_conv_pack
lrn
pooling
)
register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
Expand Down
19 changes: 4 additions & 15 deletions src/targets/gpu/compile_miopen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,35 +60,24 @@ struct miopen_op
};
MIGRAPHX_REGISTER_OP(miopen_op);

std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool format) const
std::size_t compile_miopen::compile(operation& op, instruction_ref ins) const
{
op.from_value({{"int8_x4_format", format}});
auto v = op.compile(*ctx, ins->get_shape(), to_shapes(ins->inputs()));
return v.get<std::size_t>("workspace", 0);
}

void compile_miopen::apply(module& m) const
{
assert(ctx);
const bool int8_x4_format = get_int8_x4_format(any_cast<migraphx::gpu::context>(*ctx));
for(auto ins : iterator_for(m))
{
if(ins->name() != "gpu::miopen_op")
continue;
auto op = any_cast<miopen_op>(ins->get_operator()).op;
std::size_t ws = 0;
try
{
// for the regular convolution and convolution_backwards, this try would always succeed
ws = compile(op, ins, int8_x4_format);
}
catch(migraphx::exception&)
{
// In case no solver supports the default format, retry using the other format.
ws = compile(op, ins, not int8_x4_format);
}
auto inputs = ins->inputs();
auto alloc = m.insert_instruction(
ws = compile(op, ins);
auto inputs = ins->inputs();
auto alloc = m.insert_instruction(
ins, make_op("allocate", {{"shape", to_value(shape{shape::int8_type, {ws}})}}));
inputs.insert(std::prev(inputs.end()), alloc);

Expand Down
97 changes: 0 additions & 97 deletions src/targets/gpu/device/int8_gemm_pack.cpp

This file was deleted.

16 changes: 2 additions & 14 deletions src/targets/gpu/gemm_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ void gemm_impl(context& ctx,
const std::vector<argument>& args,
T alpha,
T beta,
bool int8_x4_format,
bool compute_fp32)
{
const bool is_3inputs = (args.size() == 4);
Expand Down Expand Up @@ -141,11 +140,6 @@ void gemm_impl(context& ctx,
}

rocblas_gemm_flags flag = rocblas_gemm_flags_none;
#if ROCBLAS_VERSION_MAJOR < 3
if(int8_x4_format)
flag = rocblas_gemm_flags_pack_int8x4;
#endif

auto a_lens = args[0].get_shape().lens();
auto b_lens = args[1].get_shape().lens();
output_shape.visit_type([&](auto as) {
Expand All @@ -167,10 +161,6 @@ void gemm_impl(context& ctx,
rocblas_int n = out_lens[dim_1];
rocblas_int k = args[0].get_shape().lens()[dim_1];
auto to_pointer = [&](auto&& arg) { return as.from(arg.data()); };
if(args[0].get_shape().type() == shape::int8_type and (k % 4) != 0 and int8_x4_format)
{
MIGRAPHX_THROW("ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!");
}

auto num_matrices = std::accumulate(
out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies<std::size_t>());
Expand Down Expand Up @@ -256,21 +246,19 @@ void gemm(context& ctx,
const std::vector<argument>& args,
float alpha,
float beta,
bool int8_x4_format,
bool compute_fp32)
{
gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32);
gemm_impl(ctx, output_shape, args, alpha, beta, compute_fp32);
}

void gemm(context& ctx,
const shape& output_shape,
const std::vector<argument>& args,
int32_t alpha,
int32_t beta,
bool int8_x4_format,
bool compute_fp32)
{
gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32);
gemm_impl(ctx, output_shape, args, alpha, beta, compute_fp32);
}

} // namespace gpu
Expand Down
2 changes: 1 addition & 1 deletion src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct compile_miopen
context* ctx = nullptr;
std::string name() const { return "gpu::compile_miopen"; }
void apply(module& m) const;
std::size_t compile(operation& op, instruction_ref ins, bool format) const;
std::size_t compile(operation& op, instruction_ref ins) const;
};

} // namespace gpu
Expand Down
40 changes: 9 additions & 31 deletions src/targets/gpu/include/migraphx/gpu/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ template <class Op>
struct miopen_convolution
{
Op op;
bool int8_x4_format = false;
shared<convolution_descriptor> cd = nullptr;
miopenConvFwdAlgorithm_t algo{};
#ifdef MIGRAPHX_HAS_FIND_2_API
Expand All @@ -74,7 +73,6 @@ struct miopen_convolution
f(self.solution_object, "solution_object"),
#endif
f(self.algo, "algo"),
f(self.int8_x4_format, "int8_x4_format"),
f(self.solution_id, "solution_id"));
}

Expand All @@ -94,9 +92,9 @@ struct miopen_convolution
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
auto workspace_size = args[2].get_shape().bytes();

Expand Down Expand Up @@ -162,8 +160,8 @@ struct miopen_convolution
shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));

auto* miopen_stream_handle = ctx.get_stream().get_miopen();
Expand All @@ -179,13 +177,8 @@ struct miopen_convolution

workspace_shape = shape{shape::int8_type, {workspace_size}};

auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
const auto& x_shape = inputs[0];
const auto& w_shape = inputs[1];

#ifdef MIGRAPHX_HAS_FIND_2_API
{
Expand Down Expand Up @@ -327,8 +320,8 @@ struct miopen_convolution
": workspace has changed during finalization.");
}

auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
auto y_desc = make_tensor(reshape_if_1d(output_shape));

auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
Expand All @@ -347,21 +340,6 @@ struct miopen_convolution
{
return shapes.size() - 1;
}

inline shape pack_int8_shape(const shape& s) const
{
if(s.type() != shape::int8_type)
{
return s;
}

auto lens = s.lens();
auto strides = s.strides();
lens[1] = (lens[1] + 3) / 4 * 4;
strides[0] = strides[1] * lens[1];

return {s.type(), lens, strides};
}
};

} // namespace gpu
Expand Down
Loading

0 comments on commit 22bb777

Please sign in to comment.