diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 340b8e607e3..bf0211e8e26 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -227,6 +227,7 @@ jobs: - name: Free space uses: jlumbroso/free-disk-space@main + continue-on-error: true with: tool-cache: true android: true @@ -305,6 +306,7 @@ jobs: steps: - name: Free space uses: jlumbroso/free-disk-space@main + continue-on-error: true with: tool-cache: true android: true @@ -335,6 +337,7 @@ jobs: steps: - name: Free space uses: jlumbroso/free-disk-space@main + continue-on-error: true with: tool-cache: true android: true @@ -375,6 +378,7 @@ jobs: steps: - name: Free space uses: jlumbroso/free-disk-space@main + continue-on-error: true with: tool-cache: true android: true @@ -493,6 +497,7 @@ jobs: steps: - name: Free space uses: jlumbroso/free-disk-space@main + continue-on-error: true with: tool-cache: true android: true diff --git a/.gitignore b/.gitignore index 92697fb2dd2..57ae0b01788 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,6 @@ docs/html cmake-build*/ build*/ +# Recommended location to install rbuild dependencies from README.md +depend + diff --git a/rbuild.ini b/rbuild.ini index 4c0973f265f..3eb2fef6247 100755 --- a/rbuild.ini +++ b/rbuild.ini @@ -29,3 +29,12 @@ define = CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache MIGRAPHX_ENABLE_CPU=On BUILD_DEV=On + +[cibuild] +cxx = ${rocm_path}/llvm/bin/clang++ +cc = ${rocm_path}/llvm/bin/clang +deps = + -f dev-requirements.txt +define = + CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache + CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f22d89aa04f..a00242f4de8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -197,6 +197,7 @@ register_migraphx_ops( reduce_sum relu reshape + reshape_lazy reverse rnn rnn_last_cell_output diff --git a/src/auto_contiguous.cpp b/src/auto_contiguous.cpp index 39ccc4c184e..8ccb8739776 100644 --- a/src/auto_contiguous.cpp +++ b/src/auto_contiguous.cpp @@ -25,7 +25,6 @@ #include #include #include - #include namespace migraphx { diff --git a/src/include/migraphx/instruction.hpp b/src/include/migraphx/instruction.hpp index d9fe7794aab..377804e7b75 100644 --- a/src/include/migraphx/instruction.hpp +++ b/src/include/migraphx/instruction.hpp @@ -81,6 +81,7 @@ struct MIGRAPHX_EXPORT instruction const std::vector& module_inputs() const; + /// Where this instruction is used as an input to another instruction const std::vector& outputs() const; friend bool operator==(const instruction& x, const instruction& y); diff --git a/src/include/migraphx/op/reshape.hpp b/src/include/migraphx/op/reshape.hpp index 2ec0bcec9f1..90843c3ecb1 100644 --- a/src/include/migraphx/op/reshape.hpp +++ b/src/include/migraphx/op/reshape.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,8 @@ #include #include #include -#include + +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -45,8 +46,6 @@ struct reshape return pack(f(self.dims, "dims")); } - value attributes() const { return {{"require_std_shape", true}}; } - std::string name() const { return "reshape"; } shape dyn_compute_shape(shape s0) const @@ -110,27 +109,9 @@ struct reshape return it; } - template - static auto can_strides_merge(DimIterator dim_start, - DimIterator dim_last, - StrideIterator stride_start, - StrideIterator stride_last) - { - assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last)); - auto cstride = *std::prev(stride_last); - return std::equal(std::make_reverse_iterator(dim_last), - std::make_reverse_iterator(dim_start + 1), - std::make_reverse_iterator(stride_last - 1), - std::make_reverse_iterator(stride_start), - [&](auto dim, auto stride) { - cstride *= dim; - return stride == cstride; - }); - } - - // This will reshape the dimesions of the input shape to use the lens of - // `rdims`. If this can't be done without changing memory layout then it - // will return nullopt + // This will attempt to alias the dimensions of the input shape to the lens of + // `rdims`. Unlike reshape_lazy though we can modify memory layout with copies and this + // can remove previous nullopts that were sent back for the alias case static optional reshape_dims(const shape& input, const std::vector& rdims) { if(input.standard()) @@ -155,13 +136,8 @@ struct reshape { auto start = idims.begin() + i; auto it = compute_end_dim(start, idims.end(), rdim); - if(it == start) - return nullopt; auto n = it - start; assert((i + n) <= istrides.size()); - if(not can_strides_merge( - start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1)) - return nullopt; i += n; rstrides.push_back(istrides[i]); } @@ -170,8 +146,7 @@ struct reshape { auto start = rdims.begin() + i; auto it = compute_end_dim(start, rdims.end(), idim); - if(it == start) - return nullopt; + auto n = it - start; assert((r + n) <= rdims.size()); auto stride = istrides[i] * idim; @@ -191,15 +166,11 @@ struct reshape auto stride = rstrides.back(); for(auto d : range(rdims.begin() + rstrides.size(), rdims.end())) { - if(d != 1) - return nullopt; + (void)d; rstrides.push_back(stride); } } - if(rdims.size() != rstrides.size()) - return nullopt; - return shape{input.type(), rdims, rstrides}; } @@ -233,25 +204,24 @@ struct reshape } auto s = reshape_dims(inputs.front(), rdims); - if(not s.has_value()) - MIGRAPHX_THROW("Reshape on axis that is not packed."); if(s->elements() != inputs.front().elements()) - MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " + + MIGRAPHX_THROW("reshape: Wrong number of elements for reshape: reshape has " + std::to_string(s->elements()) + " elements whereas the input has " + std::to_string(inputs.front().elements())); - assert(s->bytes() == inputs.front().bytes()); return *s; } shape compute_shape(std::vector inputs) const { check_shapes{inputs, *this, true}.has(1); + auto n_neg_dims = std::count(dims.begin(), dims.end(), -1); if(n_neg_dims > 1) - MIGRAPHX_THROW("Reshape: Dimensions for reshape can only have one -1 dim"); - auto s0 = inputs[0]; + MIGRAPHX_THROW("reshape: Dimensions for reshape can only have one -1 dim"); + + auto s0 = inputs.front(); if(s0.dynamic()) { return dyn_compute_shape(s0); @@ -264,10 +234,14 @@ struct reshape argument compute(const dyn_output& dyn_out, std::vector args) const { - return args[0].reshape(dyn_out.computed_shape); - } + assert(dyn_out.computed_shape.standard()); + argument result{dyn_out.computed_shape}; - std::ptrdiff_t output_alias(const std::vector&) const { return 0; } + visit_all(result, args[0])([&](auto output, auto input) { + std::copy(input.begin(), input.end(), output.begin()); + }); + return result; + } }; } // namespace op diff --git a/src/include/migraphx/op/reshape_lazy.hpp b/src/include/migraphx/op/reshape_lazy.hpp new file mode 100644 index 00000000000..7263e8006d4 --- /dev/null +++ b/src/include/migraphx/op/reshape_lazy.hpp @@ -0,0 +1,279 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_OPERATORS_RESHAPE_LAZY_HPP +#define MIGRAPHX_GUARD_OPERATORS_RESHAPE_LAZY_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace op { + +struct reshape_lazy +{ + std::vector dims; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.dims, "dims")); + } + + value attributes() const { return {{"require_std_shape", true}}; } + + std::string name() const { return "reshape_lazy"; } + + shape dyn_compute_shape(shape s0) const + { + auto dyn_dims = s0.dyn_dims(); + auto num_not_fixed = std::count_if( + dyn_dims.cbegin(), dyn_dims.cend(), [](auto dd) { return not dd.is_fixed(); }); + if(num_not_fixed != 1) + { + MIGRAPHX_THROW("reshape_lazy: Only supports one non-fixed dynamic_dimension"); + } + // track number of fixed elements in input and output + std::size_t num_dims_ele = 1; + std::size_t num_dd_ele = 1; + for(std::size_t i = 0; i < dyn_dims.size(); ++i) + { + if(dyn_dims[i].is_fixed()) + { + num_dims_ele *= dims[i]; + num_dd_ele *= dyn_dims[i].min; + } + else + { + if(dims[i] != 0 and dims[i] != -1) + { + MIGRAPHX_THROW( + "reshape_lazy: Non-fixed dynamic_dimension doesn't match with 0 or -1 " + "output dimension"); + } + } + } + if(num_dims_ele != num_dd_ele) + { + MIGRAPHX_THROW("reshape_lazy: Number of fixed elements must match. Input: " + + std::to_string(num_dd_ele) + " Output: " + std::to_string(num_dims_ele)); + } + // construct output dynamic shape from dims attribute + std::vector output_dyn_dims(dims.size()); + std::transform(dims.cbegin(), + dims.cend(), + dyn_dims.cbegin(), + output_dyn_dims.begin(), + [](std::size_t dim, auto dyn_dim) { + if(not dyn_dim.is_fixed()) + return dyn_dim; + return shape::dynamic_dimension{dim, dim}; + }); + return {s0.type(), output_dyn_dims}; + } + + template + static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim) + { + std::size_t x = 1; + auto it = std::find_if(start, last, [&](auto i) { + x *= i; + return x >= dim; + }); + if(x != dim) + return start; + return it; + } + + template + static auto can_strides_merge(DimIterator dim_start, + DimIterator dim_last, + StrideIterator stride_start, + StrideIterator stride_last) + { + assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last)); + auto cstride = *std::prev(stride_last); + return std::equal(std::make_reverse_iterator(dim_last), + std::make_reverse_iterator(dim_start + 1), + std::make_reverse_iterator(stride_last - 1), + std::make_reverse_iterator(stride_start), + [&](auto dim, auto stride) { + cstride *= dim; + return stride == cstride; + }); + } + + // This will attempt to alias the dimensions of the input shape to the lens of + // `rdims`. If this can't be done without changing memory layout then it + // will return nullopt + static optional reshape_lazy_dims(const shape& input, + const std::vector& rdims) + { + if(input.standard()) + return shape{input.type(), rdims}; + + const auto& idims = input.lens(); + const auto& istrides = input.strides(); + + std::vector rstrides; + std::size_t i = 0; + std::size_t r = 0; + while(i < idims.size() and r < rdims.size()) + { + auto idim = idims[i]; + auto rdim = rdims[r]; + if(rdim == idim) + { + rstrides.push_back(istrides[i]); + } + // squeeze + else if(rdim > idim) + { + auto start = idims.begin() + i; + auto it = compute_end_dim(start, idims.end(), rdim); + if(it == start) + return nullopt; + auto n = it - start; + assert((i + n) <= istrides.size()); + if(not can_strides_merge( + start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1)) + return nullopt; + i += n; + rstrides.push_back(istrides[i]); + } + // unsqueeze + else // if(rdim < idim) + { + auto start = rdims.begin() + i; + auto it = compute_end_dim(start, rdims.end(), idim); + if(it == start) + return nullopt; + auto n = it - start; + assert((r + n) <= rdims.size()); + auto stride = istrides[i] * idim; + std::for_each(start, it + 1, [&](auto dim) { + stride /= dim; + rstrides.push_back(stride); + }); + r += n; + } + i++; + r++; + } + + // Handle trailing 1s + if(rstrides.size() < rdims.size() and not rstrides.empty()) + { + auto stride = rstrides.back(); + for(auto d : range(rdims.begin() + rstrides.size(), rdims.end())) + { + if(d != 1) + return nullopt; + rstrides.push_back(stride); + } + } + + if(rdims.size() != rstrides.size()) + return nullopt; + + return shape{input.type(), rdims, rstrides}; + } + + shape static_compute_shape(std::vector inputs, std::size_t n_neg_dims) const + { + check_shapes{inputs, *this}.has(1); + auto&& idims = inputs.front().lens(); + std::vector rdims(dims.begin(), dims.end()); + + for(std::size_t i = 0; i < dims.size(); i++) + { + if(dims[i] == 0) + rdims[i] = idims[i]; + + // since rdims using size_t type, -1 is the max value + // is size_t that cause later compuation incorrect + if(dims[i] == -1) + rdims[i] = 1; + } + + if(n_neg_dims > 0) + { + size_t missing_dim = + inputs.front().elements() / + std::accumulate(rdims.begin(), rdims.end(), 1, std::multiplies()); + for(std::size_t i = 0; i < rdims.size(); i++) + { + if(dims[i] == -1) + rdims[i] = missing_dim; + } + } + + auto s = reshape_lazy_dims(inputs.front(), rdims); + if(not s.has_value()) + MIGRAPHX_THROW("reshape_lazy on axis that is not packed."); + + if(s->elements() != inputs.front().elements()) + MIGRAPHX_THROW( + "reshape_lazy: Wrong number of elements for reshape_lazy: reshape_lazy has " + + std::to_string(s->elements()) + " elements whereas the input has " + + std::to_string(inputs.front().elements())); + + assert(s->bytes() == inputs.front().bytes()); + return *s; + } + + shape compute_shape(std::vector inputs) const + { + check_shapes{inputs, *this, true}.has(1); + auto n_neg_dims = std::count(dims.begin(), dims.end(), -1); + if(n_neg_dims > 1) + MIGRAPHX_THROW("reshape_lazy: Dimensions for reshape_lazy can only have one -1 dim"); + auto s0 = inputs[0]; + if(s0.dynamic()) + { + return dyn_compute_shape(s0); + } + else + { + return static_compute_shape(inputs, n_neg_dims); + } + } + + argument compute(const dyn_output& dyn_out, std::vector args) const + { + return args[0].reshape(dyn_out.computed_shape); + } + + std::ptrdiff_t output_alias(const std::vector&) const { return 0; } +}; + +} // namespace op +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/src/rewrite_pooling.cpp b/src/rewrite_pooling.cpp index d4b2fa8a2f4..b381a8a73f9 100644 --- a/src/rewrite_pooling.cpp +++ b/src/rewrite_pooling.cpp @@ -43,9 +43,7 @@ void rewrite_pooling::apply(module& m) const continue; if(ins->inputs().empty()) continue; - auto&& s = ins->inputs().front()->get_shape(); - if(not s.standard()) - continue; + auto&& s = ins->inputs().front()->get_shape(); auto&& op = any_cast(ins->get_operator()); if(not std::all_of(op.padding.begin(), op.padding.end(), [](auto i) { return i == 0; })) continue; @@ -54,27 +52,18 @@ void rewrite_pooling::apply(module& m) const auto lens = s.lens(); if(not std::equal(lens.begin() + 2, lens.end(), op.lengths.begin(), op.lengths.end())) continue; - std::int64_t n = s.lens()[0]; - std::int64_t c = s.lens()[1]; - auto reshape = m.insert_instruction( - ins, make_op("reshape", {{"dims", {n * c, -1}}}), ins->inputs().front()); - instruction_ref pooling{}; - + std::vector axes(lens.size() - 2); + std::iota(axes.begin(), axes.end(), 2); // average pooling if(op.mode == op::pooling_mode::average) { - pooling = m.insert_instruction(ins, make_op("reduce_mean", {{"axes", {1}}}), reshape); + m.replace_instruction(ins, make_op("reduce_mean", {{"axes", axes}}), ins->inputs()); } // max pooling else { - pooling = m.insert_instruction(ins, make_op("reduce_max", {{"axes", {1}}}), reshape); + m.replace_instruction(ins, make_op("reduce_max", {{"axes", axes}}), ins->inputs()); } - - std::vector rsp_lens(lens.size(), 1); - rsp_lens[0] = n; - rsp_lens[1] = c; - m.replace_instruction(ins, make_op("reshape", {{"dims", rsp_lens}}), pooling); } } diff --git a/src/simplify_reshapes.cpp b/src/simplify_reshapes.cpp index 08ea498e720..cd27b1157f2 100644 --- a/src/simplify_reshapes.cpp +++ b/src/simplify_reshapes.cpp @@ -122,6 +122,11 @@ struct find_nop_reshapes reshapes.insert("pad"); reshapes.insert("slice"); reshapes.insert("transpose"); + reshapes.insert("reduce_mean"); + reshapes.insert("reduce_max"); + reshapes.insert("reduce_min"); + reshapes.insert("reduce_sum"); + reshapes.insert("reduce_prod"); return match::name(reshapes)(match::same_shape(match::arg(0))); } diff --git a/src/targets/gpu/fuse_mlir.cpp b/src/targets/gpu/fuse_mlir.cpp index f2f2ccc8015..e40b31ddd1d 100644 --- a/src/targets/gpu/fuse_mlir.cpp +++ b/src/targets/gpu/fuse_mlir.cpp @@ -327,12 +327,12 @@ struct find_mlir_standalone_op struct find_mlir_standalone_convolution_op : find_mlir_standalone_op { - auto matcher() const { return match::name("convolution"); } + auto matcher() const { return is_mlir_conv; } }; struct find_mlir_standalone_dot_op : find_mlir_standalone_op { - auto matcher() const { return match::name("dot"); } + auto matcher() const { return match::any_of(match::name("dot"), match::name("quant_dot")); } }; /** @@ -365,7 +365,7 @@ bool is_enabled(std::string_view op_name, context* ctx) { return true; } - else if(op_name == "convolution") + else if(op_name == "convolution" or op_name == "quant_convolution") { if(ctx == nullptr) { diff --git a/src/targets/gpu/fuse_ops.cpp b/src/targets/gpu/fuse_ops.cpp index d0155fa7707..21ea9498c7c 100644 --- a/src/targets/gpu/fuse_ops.cpp +++ b/src/targets/gpu/fuse_ops.cpp @@ -790,22 +790,26 @@ struct find_layernorm_pointwise { auto matcher() const { - return precompile_name("pointwise")(match::arg(0)( + return precompile_name("pointwise")(match::any_of[match::inputs()]( precompile_name("gpu::prelayernorm", "gpu::preadd_layernorm").bind("layernorm"))); } void apply(module& m, const match::matcher_result& r) const { - auto ins = r.result; + auto pw_ins = r.result; auto layernorm = r.instructions["layernorm"]; if(not layernorm->module_inputs().empty()) return; - auto* pm = ins->module_inputs().front(); + auto* pm = pw_ins->module_inputs().front(); + auto pw_inputs = pw_ins->inputs(); + auto ln_pos = std::find(pw_inputs.begin(), pw_inputs.end(), layernorm); + assert(ln_pos != pw_inputs.end()); + pw_inputs.erase(ln_pos); auto inputs = layernorm->inputs(); inputs.pop_back(); - inputs.insert(inputs.end(), ins->inputs().begin() + 1, ins->inputs().end()); + inputs.insert(inputs.end(), pw_inputs.begin(), pw_inputs.end()); - m.replace_instruction(ins, layernorm->get_operator(), inputs, {pm}); + m.replace_instruction(pw_ins, layernorm->get_operator(), inputs, {pm}); } }; diff --git a/src/targets/gpu/lowering.cpp b/src/targets/gpu/lowering.cpp index 5a35464ec76..d0357f06c63 100644 --- a/src/targets/gpu/lowering.cpp +++ b/src/targets/gpu/lowering.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -89,7 +90,6 @@ struct miopen_apply offload_copy = (mod == mpm->get_root_module()) ? pass->offload_copy : false; add_generic_op("contiguous"); - add_extend_op("argmax"); add_extend_op("argmin"); add_extend_op("logsoftmax"); @@ -115,6 +115,7 @@ struct miopen_apply add_neg_op(); add_nms_op(); add_select_module_op(); + add_reshape_lazy_op(); } void copy_params() const @@ -376,6 +377,32 @@ struct miopen_apply return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs()); }); } + + /** + * Adds reshape lazy to reshape ops that can be aliased instead of copied. + * `gpu::contiguous` are added before and after the reshape; these contiguous + * instructions can be removed by the eliminate_contiguous pass. + */ + void add_reshape_lazy_op() + { + apply_map.emplace("reshape", [=](instruction_ref ins) { + std::vector before_contiguous_args = ins->inputs(); + auto before_alloc = insert_allocation(ins, std::prev(ins)->get_shape()); + before_contiguous_args.push_back(before_alloc); + auto before_contig = + mod->insert_instruction(ins, make_op("gpu::contiguous"), {before_contiguous_args}); + + auto new_lazy_reshape = mod->insert_instruction( + ins, + make_op("reshape_lazy", {{"dims", {ins->get_operator().to_value().at("dims")}}}), + before_contig); + + std::vector after_contiguous_args = {new_lazy_reshape}; + auto after_alloc = insert_allocation(new_lazy_reshape, new_lazy_reshape->get_shape()); + after_contiguous_args.push_back(after_alloc); + return mod->replace_instruction(ins, make_op("gpu::contiguous"), after_contiguous_args); + }); + } }; void lowering::apply(module_pass_manager& mpm) const diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp index 94b3761d0b6..347f93333ea 100644 --- a/src/targets/gpu/mlir.cpp +++ b/src/targets/gpu/mlir.cpp @@ -24,6 +24,7 @@ #include "migraphx/make_op.hpp" #include #include +#include #ifdef MIGRAPHX_MLIR #include @@ -34,6 +35,7 @@ #include #include #include +#include #include #if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3 #warning "Incompatible version of rocMLIR library used, disabling" @@ -180,13 +182,85 @@ std::string mlir_print(F f, T x) return ss.str(); } +struct mlir_logger +{ + std::stringstream ss; + mlir_context* ctx; + std::optional id; + + mlir_logger() : ctx(nullptr), id(std::nullopt) {} + + mlir_logger(mlir_context* context) : ctx(context) + { + id = + mlirContextAttachDiagnosticHandler(ctx->get(), mlir_diagnostic_print_cb, this, nullptr); + } + + ~mlir_logger() + { + if(id.has_value()) + mlirContextDetachDiagnosticHandler(ctx->get(), *id); + } + + mlir_logger(const mlir_logger& other) = delete; + mlir_logger& operator=(const mlir_logger& other) = delete; + + mlir_logger(mlir_logger&& other) noexcept + : ss(std::move(other.ss)), ctx(other.ctx), id(other.id) + { + other.ctx = nullptr; + other.id = std::nullopt; + } + + mlir_logger& operator=(mlir_logger other) noexcept + { + std::swap(ss, other.ss); + std::swap(ctx, other.ctx); + std::swap(id, other.id); + return *this; + } + + std::string str() const { return ss.str(); } + + void clear() { ss = std::stringstream{}; } + + static MlirLogicalResult mlir_diagnostic_print_cb(MlirDiagnostic diag, void* logger); + + MlirLogicalResult handle(MlirDiagnostic diag); +}; + +MlirLogicalResult mlir_logger::mlir_diagnostic_print_cb(MlirDiagnostic diag, void* logger) +{ + return reinterpret_cast(logger)->handle(diag); +} + +MlirLogicalResult mlir_logger::handle(MlirDiagnostic diag) +{ + MlirDiagnosticSeverity sev = mlirDiagnosticGetSeverity(diag); + switch(sev) + { + case MlirDiagnosticSeverity::MlirDiagnosticError: ss << "Error: "; break; + case MlirDiagnosticSeverity::MlirDiagnosticWarning: ss << "Warning: "; break; + case MlirDiagnosticSeverity::MlirDiagnosticNote: ss << "Note: "; break; + case MlirDiagnosticSeverity::MlirDiagnosticRemark: ss << "Remark: "; break; + } + mlir_print(mlirDiagnosticPrint, diag, [&](auto s) { ss << s; }); + ss << std::endl; + for(intptr_t i = 0, e = mlirDiagnosticGetNumNotes(diag); i < e; ++i) + { + (void)handle(mlirDiagnosticGetNote(diag, i)); + } + return mlirLogicalResultSuccess(); +} + struct mlir_program { mlir_program() : ctx(mlirContextCreateWithRegistry(get_dialect_registry().get(), /*threadingEnable=*/false)), location(mlirLocationUnknownGet(ctx.get())), - mmodule(mlirModuleCreateEmpty(location)) + mmodule(mlirModuleCreateEmpty(location)), + logger(&ctx) { mlirContextSetThreadPool(ctx.get(), get_thread_pool().get()); mlirContextLoadAllAvailableDialects(ctx.get()); @@ -614,21 +688,49 @@ struct mlir_program } } - void run_high_level_pipeline() MIGRAPHX_TIDY_CONST + void run_high_level_pipeline() { mlir_pass_manager pm_front{mlirPassManagerCreate(ctx.get())}; mlirMIGraphXAddHighLevelPipeline(pm_front.get()); - mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get())); + logger.clear(); + if(mlirLogicalResultIsFailure( + mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get())))) + { + std::string error = "Invalid MLIR created: " + logger.str(); + if(enabled(MIGRAPHX_TRACE_MLIR{})) + { + std::cout << error << std::endl; + } + MIGRAPHX_THROW(error); + } } - void run_backend_pipeline() MIGRAPHX_TIDY_CONST + void run_backend_pipeline() { mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())}; mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str()); - mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get())); + logger.clear(); + const size_t trace = value_of(MIGRAPHX_TRACE_MLIR{}); + static std::mutex mutex; + auto mod_op = mlirModuleGetOperation(mmodule.get()); + if(trace >= 2) + { + const std::lock_guard lock(mutex); + std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl; + } + + if(mlirLogicalResultIsFailure(mlirPassManagerRunOnOp(pm_back.get(), mod_op))) + { + std::string error = "MLIR backend compilation failed: " + logger.str(); + if(enabled(MIGRAPHX_TRACE_MLIR{})) + { + std::cout << error << std::endl; + } + MIGRAPHX_THROW(error); + } } - code_object_op compile(const value& solution) MIGRAPHX_TIDY_CONST + code_object_op compile(const value& solution) { // 1st pipeline to call run_high_level_pipeline(); @@ -682,7 +784,7 @@ struct mlir_program MIGRAPHX_THROW("Failed setting tuning key: " + *str); } - tuning_config get_tuning_config(bool exhaustive) MIGRAPHX_TIDY_CONST + tuning_config get_tuning_config(bool exhaustive) { tuning_config tc; run_high_level_pipeline(); @@ -702,7 +804,8 @@ struct mlir_program if(perf_key_bytes > perf_key.size()) MIGRAPHX_THROW("Tuning perf key was " + std::to_string(perf_key_bytes) + " bytes and thus too long"); - tc.solutions.emplace_back(perf_key.begin(), perf_key.begin() + perf_key_bytes); + tc.solutions.emplace_back( + std::string(perf_key.begin(), perf_key.begin() + perf_key_bytes)); } std::array tuning_key; size_t tuning_key_bytes = @@ -809,6 +912,7 @@ struct mlir_program mlir_context ctx; MlirLocation location; mlir_module mmodule; + mlir_logger logger; problem_params pp; std::deque strings{}; std::string target_arch = ""; diff --git a/test/auto_contiguous_test.cpp b/test/auto_contiguous_test.cpp index a70e2ec1d0e..b3f401dd9d1 100644 --- a/test/auto_contiguous_test.cpp +++ b/test/auto_contiguous_test.cpp @@ -158,6 +158,31 @@ TEST_CASE(two_transpose_gather) EXPECT(m1 == m2); } +TEST_CASE(standard_reshape_lazy) +{ + migraphx::module m1; + { + auto data = m1.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}}); + auto add = m1.add_instruction(migraphx::make_op("add"), data, data); + auto r = + m1.add_instruction(migraphx::make_op("reshape_lazy", {{"dims", {2, 1, 12, 5}}}), add); + m1.add_return({r}); + } + run_pass(m1); + + migraphx::module m2; + { + auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}}); + auto add = m2.add_instruction(migraphx::make_op("add"), data, data); + auto ca = m2.add_instruction(migraphx::make_op("contiguous"), add); + auto r = + m2.add_instruction(migraphx::make_op("reshape_lazy", {{"dims", {2, 1, 12, 5}}}), ca); + m2.add_return({r}); + } + + EXPECT(m1 == m2); +} + TEST_CASE(standard_reshape) { migraphx::module m1; @@ -173,8 +198,7 @@ TEST_CASE(standard_reshape) { auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}}); auto add = m2.add_instruction(migraphx::make_op("add"), data, data); - auto ca = m2.add_instruction(migraphx::make_op("contiguous"), add); - auto r = m2.add_instruction(migraphx::make_op("reshape", {{"dims", {2, 1, 12, 5}}}), ca); + auto r = m2.add_instruction(migraphx::make_op("reshape", {{"dims", {2, 1, 12, 5}}}), add); m2.add_return({r}); } diff --git a/test/gpu/fuse_ops.cpp b/test/gpu/fuse_ops.cpp new file mode 100644 index 00000000000..42b3db25526 --- /dev/null +++ b/test/gpu/fuse_ops.cpp @@ -0,0 +1,107 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "make_precompile_op.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void run_pass(migraphx::program& p) +{ + migraphx::run_passes(p, {migraphx::gpu::fuse_ops{}, migraphx::dead_code_elimination{}}); +} + +TEST_CASE(layernorm_pointwise) +{ + migraphx::shape s{migraphx::shape::float_type, {2, 3, 4}}; + auto create_program = [=](bool first_arg_layernorm) { + migraphx::program p; + auto* mm = p.get_main_module(); + auto x = mm->add_parameter("x", s); + auto y = mm->add_parameter("y", s); + auto z = mm->add_parameter("z", s); + auto alloc = migraphx::make_op("allocate", {{"shape", to_value(s)}}); + auto alloc_ins = mm->add_instruction(alloc); + auto* pw_add1 = + create_pointwise_module(p, "main:pointwise0", {x, y}, single_pointwise("add")); + auto add1 = + mm->add_instruction(make_precompile_op("pointwise"), {x, y, alloc_ins}, {pw_add1}); + auto alloc_ins2 = mm->add_instruction(alloc); + auto layernorm_ins = + mm->add_instruction(make_precompile_op("gpu::prelayernorm"), add1, alloc_ins2); + std::vector pw_inputs = {layernorm_ins, z}; + if(not first_arg_layernorm) + { + pw_inputs = {z, layernorm_ins}; + } + auto* pw_add2 = + create_pointwise_module(p, "main:pointwise1", pw_inputs, single_pointwise("add")); + auto alloc_ins3 = mm->add_instruction(alloc); + pw_inputs.push_back(alloc_ins3); + auto add2 = mm->add_instruction(make_precompile_op("pointwise"), pw_inputs, {pw_add2}); + mm->add_return({add2}); + return p; + }; + + auto create_fused_program = [=]() { + migraphx::program p; + auto* mm = p.get_main_module(); + auto x = mm->add_parameter("x", s); + auto y = mm->add_parameter("y", s); + auto z = mm->add_parameter("z", s); + auto alloc = migraphx::make_op("allocate", {{"shape", to_value(s)}}); + auto alloc_ins = mm->add_instruction(alloc); + auto* pw_add1 = + create_pointwise_module(p, "main:pointwise0", {x, y}, single_pointwise("add")); + auto add1 = + mm->add_instruction(make_precompile_op("pointwise"), {x, y, alloc_ins}, {pw_add1}); + auto alloc_ins2 = mm->add_instruction(alloc); + auto* pw_add2 = + create_pointwise_module(p, "main:pointwise1", {x, z}, single_pointwise("add")); + auto layernorm_ins = mm->add_instruction( + make_precompile_op("gpu::prelayernorm"), {add1, z, alloc_ins2}, {pw_add2}); + mm->add_return({layernorm_ins}); + return p; + }; + + { + migraphx::program p1 = create_program(true); + run_pass(p1); + migraphx::program p2 = create_fused_program(); + EXPECT(p1 == p2); + } + { + migraphx::program p1 = create_program(false); + run_pass(p1); + migraphx::program p2 = create_fused_program(); + EXPECT(p1 == p2); + } +} + +int main(int argc, const char* argv[]) { test::run(argc, argv); } diff --git a/test/include/pointwise.hpp b/test/include/pointwise.hpp index 03892d60acd..bd5316939c0 100644 --- a/test/include/pointwise.hpp +++ b/test/include/pointwise.hpp @@ -24,16 +24,16 @@ #ifndef MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP #define MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP +#include #include #include #include template -migraphx::instruction_ref add_pointwise(migraphx::program& p, - migraphx::module_ref mm, - const std::string& name, - std::vector inputs, - F f) +migraphx::module_ref create_pointwise_module(migraphx::program& p, + const std::string& name, + std::vector inputs, + F f) { auto* pm = p.create_module(name); pm->set_bypass(); @@ -44,6 +44,17 @@ migraphx::instruction_ref add_pointwise(migraphx::program& p, }); auto r = f(pm, params); pm->add_return({r}); + return pm; +} + +template +migraphx::instruction_ref add_pointwise(migraphx::program& p, + migraphx::module_ref mm, + const std::string& name, + std::vector inputs, + F f) +{ + auto* pm = create_pointwise_module(p, name, inputs, f); return mm->add_instruction(migraphx::make_op("pointwise"), inputs, {pm}); } diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index 0d649690665..ae0abd027e5 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -2524,13 +2524,21 @@ TEST_CASE(reshape_shape) migraphx::shape output{migraphx::shape::float_type, lens}; expect_shape(output, migraphx::make_op("reshape", {{"dims", new_shape}}), input); } +} +TEST_CASE(reshape_shape_invalid) +{ + migraphx::shape input{migraphx::shape::float_type, {24, 1, 1, 1}}; for(auto&& new_shape : std::vector>{{8, 3, 2, 2}, {1, 3, -1, -1}, {3, 0}, {3, 2}}) { throws_shape(migraphx::make_op("reshape", {{"dims", new_shape}}), input); } +} +TEST_CASE(reshape_shape_minus1_reshapes) +{ + migraphx::shape input{migraphx::shape::float_type, {24, 1, 1, 1}}; std::vector, migraphx::shape>> minus1_tests{ {{2, -1, 3}, {migraphx::shape::float_type, {2, 4, 3}}}, {{0, -1, 0}, {migraphx::shape::float_type, {24, 1, 1}}}, @@ -2654,11 +2662,11 @@ TEST_CASE(reshape_broadcast_squeeze) expect_shape(output, migraphx::make_op("reshape", {{"dims", output.lens()}}), input); } -TEST_CASE(reshape_broadcast_squeeze_error) +TEST_CASE(reshape_broadcast_squeeze_memlayout_change) { migraphx::shape input{migraphx::shape::float_type, {2, 16, 16, 1280}, {0, 0, 0, 1}}; - std::vector new_shape = {2, 16, 20480}; - throws_shape(migraphx::make_op("reshape", {{"dims", new_shape}}), input); + migraphx::shape output{migraphx::shape::float_type, {2, 16, 256, 80}, {0, 0, 0, 16}}; + expect_shape(output, migraphx::make_op("reshape", {{"dims", output.lens()}}), input); } TEST_CASE(reshape_dyn_shape) @@ -2706,6 +2714,199 @@ TEST_CASE(reshape_non_fixed_not_matching_error) throws_shape(migraphx::make_op("reshape", {{"dims", new_shape}}), input); } +TEST_CASE(reshape_lazy_shape) +{ + migraphx::shape input{migraphx::shape::float_type, {24, 1, 1, 1}}; + for(auto&& new_shape : + std::vector>{{8, 3, 1, 1}, {1, 3, 4, 2}, {1, 3, 4, 2}}) + { + std::vector lens(new_shape.size()); + std::copy(new_shape.begin(), new_shape.end(), lens.begin()); + migraphx::shape output{migraphx::shape::float_type, lens}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); + } + + for(auto&& new_shape : + std::vector>{{8, 3, 2, 2}, {1, 3, -1, -1}, {3, 0}, {3, 2}}) + { + throws_shape(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); + } + + std::vector, migraphx::shape>> minus1_tests{ + {{2, -1, 3}, {migraphx::shape::float_type, {2, 4, 3}}}, + {{0, -1, 0}, {migraphx::shape::float_type, {24, 1, 1}}}, + {{2, -1, 0}, {migraphx::shape::float_type, {2, 12, 1}}}, + {{0, 0, -1}, {migraphx::shape::float_type, {24, 1, 1}}}, + {{2, 0, -1}, {migraphx::shape::float_type, {2, 1, 12}}}, + {{-1, 2, 3}, {migraphx::shape::float_type, {4, 2, 3}}}, + {{-1, 0, 3}, {migraphx::shape::float_type, {8, 1, 3}}}, + {{-1, 0, 0}, {migraphx::shape::float_type, {24, 1, 1}}}, + {{-1, 3, 0}, {migraphx::shape::float_type, {8, 3, 1}}}}; + + for(auto& it : minus1_tests) + { + expect_shape(it.second, migraphx::make_op("reshape_lazy", {{"dims", it.first}}), input); + } +} + +// This uses the permutation to compute the reshape_lazy since its simpler than +// trying to calculate strides. As we collapse or expand dimensions, we +// remove the collapsed dimensions or duplicate the expanded dimensions in +// the permutation. Then we renumber the permutation. So for dimensions of 4, +// 24, 1, 1, 1 with a permutation of 1, 0, 2, 3, 4 that reshape_lazys to 4, 1, 3, +// 4, 2, we first remove the collapsed dimensions or duplicate the expanded +// dimensions which gives 1, 0, 0, 0, 0. Then after renumbering we get a +// final permutation of 4, 0, 1, 2, 3. +TEST_CASE(reshape_lazy_nonstandard) +{ + auto input = migraphx::shape::from_permutation(migraphx::shape::float_type, + {4, 24, 1, 1, 1}, + migraphx::invert_permutation({1, 0, 2, 3, 4})); + std::vector, std::vector>> tests{ + {{4, 24}, {1, 0}}, + {{4, 24, 1, 1, 1, 1}, {1, 0, 2, 3, 4, 5}}, + {{4, 8, 3, 1, 1}, {2, 0, 1, 3, 4}}, + {{4, 1, 3, 4, 2}, {4, 0, 1, 2, 3}}, + {{4, 1, 4, 3, 2}, {4, 0, 1, 2, 3}}, + {{4, 2, 4, 3}, {3, 0, 1, 2}}, + {{4, 2, 12, 1}, {2, 0, 1, 3}}, + {{4, 2, 1, 12}, {3, 0, 1, 2}}, + {{4, 4, 2, 3}, {3, 0, 1, 2}}, + {{4, 8, 1, 3}, {3, 0, 1, 2}}, + {{4, 8, 3, 1}, {2, 0, 1, 3}}}; + + for(const auto& [dims, perm] : tests) + { + migraphx::shape output = migraphx::shape::from_permutation( + migraphx::shape::float_type, dims, migraphx::invert_permutation(perm)); + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", dims}}), input); + } +} + +TEST_CASE(reshape_lazy_nonstandard_squeeze) +{ + auto input = migraphx::shape::from_permutation( + migraphx::shape::float_type, {2, 16, 16, 1280}, migraphx::invert_permutation({0, 2, 3, 1})); + std::vector lens = {2, 256, 1280}; + migraphx::shape output = migraphx::shape::from_permutation( + migraphx::shape::float_type, lens, migraphx::invert_permutation({0, 2, 1})); + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", lens}}), input); +} + +TEST_CASE(reshape_lazy_nonstandard_error) +{ + auto input = migraphx::shape::from_permutation(migraphx::shape::float_type, + {4, 24, 1, 1, 1}, + migraphx::invert_permutation({1, 0, 2, 3, 4})); + for(auto&& new_shape : std::vector>{{4, 8, 3, 2, 2}, + {1}, + {4, 8, 4}, + {4, 24, 1, 1, 1, 1, 2}, + {8, 4, 4}, + {4, 1, 3, -1, -1}, + {4, 3, 0}, + {4, 3, 2}, + {3, 0}, + {3, 2}}) + { + throws_shape(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); + } +} + +TEST_CASE(reshape_lazy_nonpacked_unsqueeze1) +{ + migraphx::shape input{migraphx::shape::float_type, {4, 16}, {32, 2}}; + migraphx::shape output{migraphx::shape::float_type, {4, 2, 8}, {32, 16, 2}}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", output.lens()}}), input); +} + +TEST_CASE(reshape_lazy_nonpacked_unsqueeze2) +{ + migraphx::shape input{migraphx::shape::float_type, {4, 16}, {32, 2}}; + migraphx::shape output{migraphx::shape::float_type, {2, 2, 16}, {64, 32, 2}}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", output.lens()}}), input); +} + +TEST_CASE(reshape_lazy_nonpacked_squeeze) +{ + migraphx::shape input{migraphx::shape::float_type, {4, 16}, {32, 2}}; + migraphx::shape output{migraphx::shape::float_type, {64}, {2}}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", output.lens()}}), input); +} + +TEST_CASE(reshape_lazy_broadcast_unsqueeze1) +{ + migraphx::shape input{migraphx::shape::float_type, {2, 256, 1280}, {0, 0, 1}}; + migraphx::shape output{migraphx::shape::float_type, {2, 16, 16, 1280}, {0, 0, 0, 1}}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", output.lens()}}), input); +} + +TEST_CASE(reshape_lazy_broadcast_unsqueeze2) +{ + migraphx::shape input{migraphx::shape::float_type, {2, 256, 1280}, {0, 0, 1}}; + migraphx::shape output{migraphx::shape::float_type, {2, 256, 16, 80}, {0, 0, 80, 1}}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", output.lens()}}), input); +} + +TEST_CASE(reshape_lazy_broadcast_squeeze) +{ + migraphx::shape input{migraphx::shape::float_type, {2, 16, 16, 1280}, {0, 0, 0, 1}}; + migraphx::shape output{migraphx::shape::float_type, {2, 256, 1280}, {0, 0, 1}}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", output.lens()}}), input); +} + +TEST_CASE(reshape_lazy_broadcast_squeeze_error) +{ + migraphx::shape input{migraphx::shape::float_type, {2, 16, 16, 1280}, {0, 0, 0, 1}}; + std::vector new_shape = {2, 16, 20480}; + throws_shape(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); +} + +TEST_CASE(reshape_lazy_dyn_shape) +{ + migraphx::shape input{migraphx::shape::float_type, {{1, 4}, {24, 24}, {1, 1}, {1, 1}}}; + for(auto&& new_shape : std::vector>{ + {-1, 1, 1, 24}, {0, 8, 3, 1}, {-1, 3, 4, 2}, {0, 2, 4, 3}}) + { + std::vector out_dyn_dims{}; + for(std::size_t i = 0; i < new_shape.size(); ++i) + { + if(new_shape[i] == 0 or new_shape[i] == -1) + { + out_dyn_dims.push_back(input.dyn_dims().at(i)); + } + else + { + std::size_t d = new_shape[i]; + out_dyn_dims.push_back({d, d}); + } + } + migraphx::shape output{migraphx::shape::float_type, out_dyn_dims}; + expect_shape(output, migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); + } +} + +TEST_CASE(reshape_lazy_multiple_non_fixed_error) +{ + migraphx::shape input{migraphx::shape::float_type, {{1, 4}, {24, 24}, {10, 20}, {1, 1}}}; + std::vector new_shape = {0, 1, 0, 24}; + throws_shape(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); +} + +TEST_CASE(reshape_lazy_fixed_ele_not_matching_error) +{ + migraphx::shape input{migraphx::shape::float_type, {{1, 4}, {24, 24}, {10, 10}, {1, 1}}}; + std::vector new_shape = {0, 1, 5, 24}; + throws_shape(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); +} + +TEST_CASE(reshape_lazy_non_fixed_not_matching_error) +{ + migraphx::shape input{migraphx::shape::float_type, {{1, 4}, {24, 24}, {1, 1}, {1, 1}}}; + std::vector new_shape = {2, 1, 1, 24}; + throws_shape(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); +} + TEST_CASE(return_shape_tuple) { using migraphx::shape; diff --git a/test/ref/reshape.cpp b/test/ref/reshape.cpp index f18a0063f09..440fb8cfe96 100644 --- a/test/ref/reshape.cpp +++ b/test/ref/reshape.cpp @@ -30,6 +30,78 @@ #include +TEST_CASE(reshape_lazy_test0) +{ + migraphx::shape a_shape{migraphx::shape::float_type, {24, 1, 1, 1}}; + std::vector data(24); + std::iota(data.begin(), data.end(), -3); + migraphx::program p; + auto* mm = p.get_main_module(); + auto l = mm->add_literal(migraphx::literal{a_shape, data}); + std::vector new_shape = {8, 3, 1, 1}; + mm->add_instruction(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), l); + p.compile(migraphx::make_target("ref")); + auto result = p.eval({}).back(); + std::vector results_vector{}; + result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); + EXPECT(migraphx::verify::verify_range(results_vector, data)); +} + +TEST_CASE(reshape_lazy_test1) +{ + migraphx::shape a_shape{migraphx::shape::float_type, {24, 1, 1, 1}}; + std::vector data(24); + std::iota(data.begin(), data.end(), -3); + migraphx::program p; + auto* mm = p.get_main_module(); + auto l = mm->add_literal(migraphx::literal{a_shape, data}); + std::vector new_shape = {1, 3, 4, 2}; + mm->add_instruction(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), l); + p.compile(migraphx::make_target("ref")); + auto result = p.eval({}).back(); + std::vector results_vector{}; + result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); + EXPECT(migraphx::verify::verify_range(results_vector, data)); +} + +TEST_CASE(reshape_lazy_test2) +{ + migraphx::shape a_shape{migraphx::shape::float_type, {24, 1, 1, 1}}; + std::vector data(24); + std::iota(data.begin(), data.end(), -3); + migraphx::program p; + auto* mm = p.get_main_module(); + auto l = mm->add_literal(migraphx::literal{a_shape, data}); + std::vector new_shape = {1, 2, 3, 4}; + mm->add_instruction(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), l); + p.compile(migraphx::make_target("ref")); + auto result = p.eval({}).back(); + std::vector results_vector{}; + result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); + EXPECT(migraphx::verify::verify_range(results_vector, data)); +} + +TEST_CASE(reshape_lazy_dyn_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + migraphx::shape s{migraphx::shape::float_type, {{1, 4}, {24, 24}, {1, 1}, {1, 1}}}; + std::vector new_shape = {0, 8, 3, 1}; + auto input = mm->add_parameter("X", s); + mm->add_instruction(migraphx::make_op("reshape_lazy", {{"dims", new_shape}}), input); + p.compile(migraphx::make_target("ref")); + + std::vector data(48); + std::iota(data.begin(), data.end(), -3); + migraphx::parameter_map params; + migraphx::shape input_fixed_shape{migraphx::shape::float_type, {2, 24, 1, 1}}; + params["X"] = migraphx::argument(input_fixed_shape, data.data()); + auto result = p.eval(params).back(); + std::vector results_vector{}; + result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); + EXPECT(migraphx::verify::verify_range(results_vector, data)); +} + TEST_CASE(reshape_test0) { migraphx::shape a_shape{migraphx::shape::float_type, {24, 1, 1, 1}}; diff --git a/test/rewrite_pooling_test.cpp b/test/rewrite_pooling_test.cpp index f6d85d9679b..802b26e9f98 100644 --- a/test/rewrite_pooling_test.cpp +++ b/test/rewrite_pooling_test.cpp @@ -62,11 +62,8 @@ TEST_CASE(rewrite_pooling_test) auto opt_program = [&](const migraphx::operation& reduce_op) { migraphx::module m; auto input = m.add_parameter("x", s); - auto rsp = m.add_instruction(migraphx::make_op("reshape", {{"dims", {4, -1}}}), input); - auto rdm = m.add_instruction(reduce_op, rsp); - auto ret = - m.add_instruction(migraphx::make_op("reshape", {{"dims", {2, 2, 1, 1, 1}}}), rdm); - m.add_return({ret}); + auto rdm = m.add_instruction(reduce_op, input); + m.add_return({rdm}); return m; }; @@ -78,8 +75,9 @@ TEST_CASE(rewrite_pooling_test) }; test_rewrite(migraphx::op::pooling_mode::average, - migraphx::make_op("reduce_mean", {{"axes", {1}}})); - test_rewrite(migraphx::op::pooling_mode::max, migraphx::make_op("reduce_max", {{"axes", {1}}})); + migraphx::make_op("reduce_mean", {{"axes", {2, 3, 4}}})); + test_rewrite(migraphx::op::pooling_mode::max, + migraphx::make_op("reduce_max", {{"axes", {2, 3, 4}}})); } TEST_CASE(rewrite_avepooling_na1_test) diff --git a/test/verify/test_reduce_add.cpp b/test/verify/test_reduce_add.cpp new file mode 100644 index 00000000000..e7c1b56b6c1 --- /dev/null +++ b/test/verify/test_reduce_add.cpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include +#include + +struct test_reduce_add : verify_program +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + migraphx::shape s{migraphx::shape::float_type, {4, 1000, 2, 2}}; + migraphx::shape bs{migraphx::shape::half_type, {1, 32, 128}}; + auto x = mm->add_parameter("x", s); + auto reduce_mean = + mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2, 3}}}), x); + auto reduce_max = + mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {2, 3}}}), x); + auto add = mm->add_instruction(migraphx::make_op("add"), reduce_mean, reduce_max); + mm->add_return({add}); + return p; + }; +}; diff --git a/test/verify/test_reduce_noop_add.cpp b/test/verify/test_reduce_noop_add.cpp new file mode 100644 index 00000000000..090c2a737d7 --- /dev/null +++ b/test/verify/test_reduce_noop_add.cpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include +#include + +struct test_reduce_noop_add : verify_program +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + migraphx::shape s{migraphx::shape::float_type, {4, 1000, 1, 1}}; + migraphx::shape bs{migraphx::shape::half_type, {1, 32, 128}}; + auto x = mm->add_parameter("x", s); + auto reduce_mean = + mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2, 3}}}), x); + auto reduce_max = + mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {2, 3}}}), x); + auto add = mm->add_instruction(migraphx::make_op("add"), reduce_mean, reduce_max); + mm->add_return({add}); + return p; + }; +};