diff --git a/src/common/transformations/src/transformations/mlir/convert.cpp b/src/common/transformations/src/transformations/mlir/convert.cpp index b978ee314b3667..a755b033cb6264 100644 --- a/src/common/transformations/src/transformations/mlir/convert.cpp +++ b/src/common/transformations/src/transformations/mlir/convert.cpp @@ -7,16 +7,18 @@ #include #include #include -#include -#include #include - +#include +#include +#include #include #include #include #include // TODO: Prune unused headers -- it's hard to understand needed ones +#include "conversion_context.hpp" +#include "convert_common.hpp" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/InitLLVM.h" @@ -55,20 +57,16 @@ #include "mlir/Target/LLVMIR/Dialect/All.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "mlir_op.hpp" +#include "op/matmul.hpp" +#include "op/relu.hpp" #include "openvino/core/dimension.hpp" #include "openvino/core/rt_info.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "transformations_visibility.hpp" #include "openvino/core/symbol.hpp" - -#include "transformations/symbolic_transformations/symbolic_optimizations.hpp" - -#include "mlir_op.hpp" -#include "convert_common.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "subgraph_tracker.hpp" -#include "conversion_context.hpp" -#include "op/matmul.hpp" - +#include "transformations/symbolic_transformations/symbolic_optimizations.hpp" +#include "transformations_visibility.hpp" namespace { @@ -269,47 +267,12 @@ class Partitioner : public ov::pass::ModelPass { } }; - -bool elementwise_f32_binary_no_broadcast_predicate(const ov::Output& output) { - if(output.get_element_type() != ov::element::f32) { - return false; - } - // Check if implicit broadcast is possible, reject in this case - // Relies on symbolic information -- register SymbolicPropagation before applying this pattern - auto input_shape_a = output.get_node_shared_ptr()->get_input_partial_shape(0); - auto input_shape_b = output.get_node_shared_ptr()->get_input_partial_shape(1); - auto output_shape = output.get_partial_shape(); - if(output_shape.rank().is_dynamic() || input_shape_a.rank().is_dynamic() || input_shape_b.rank().is_dynamic()) { - return false; - } - if(output_shape.rank().get_length() != input_shape_a.rank().get_length() || output_shape.rank().get_length() != input_shape_b.rank().get_length()) { - return false; - } - - for(size_t i = 0; i < output_shape.size(); ++i) { - if(output_shape[i] != input_shape_a[i] || output_shape[i] != input_shape_b[i]) { - return false; - } - // Continue if all shapes are static. - if (output_shape[i].is_static() && input_shape_a[i].is_static() && - input_shape_b[i].is_static()) - continue; - if(!ov::symbol::are_equal(output_shape[i].get_symbol(), input_shape_a[i].get_symbol()) || !ov::symbol::are_equal(output_shape[i].get_symbol(), input_shape_b[i].get_symbol())) { - return false; - } - } - - return true; -} - - template NodePtr elementwise_f32_binary_no_broadcast() { using namespace ov::pass::pattern; - return wrap_type({any_input(), any_input()}, elementwise_f32_binary_no_broadcast_predicate); + return wrap_type({any_input(), any_input()}, elementwise_no_broadcast_predicate); } - void injectMLIR(std::shared_ptr model, MLIRContext* context) { ov::pass::Manager manager; using namespace ov::op; @@ -319,6 +282,7 @@ void injectMLIR(std::shared_ptr model, MLIRContext* context) { manager.register_pass(elementwise_f32_binary_no_broadcast(), ConvertBinary()); manager.register_pass(elementwise_f32_binary_no_broadcast(), ConvertBinary()); manager.register_pass(elementwise_f32_binary_no_broadcast(), ConvertBinary()); + manager.register_pass(); manager.register_pass(); manager.register_pass(context); manager.run_passes(model); diff --git a/src/common/transformations/src/transformations/mlir/convert_common.cpp b/src/common/transformations/src/transformations/mlir/convert_common.cpp index f7c917af1a23cb..6bca04c759a356 100644 --- a/src/common/transformations/src/transformations/mlir/convert_common.cpp +++ b/src/common/transformations/src/transformations/mlir/convert_common.cpp @@ -128,5 +128,42 @@ Location createLocation(MLIRContext* ctx, NodePtr node) { return createLayerLocation(ctx, node->get_friendly_name(), node->get_type_name()); } +bool elementwise_no_broadcast_predicate_impl(const ov::Output& output, ov::element::Type type) { + if (output.get_element_type() != type) { + return false; + } + // Check if implicit broadcast is possible, reject in this case + // Relies on symbolic information -- register SymbolicPropagation before applying this pattern + auto inputs = output.get_node_shared_ptr()->inputs(); + auto output_shape = output.get_partial_shape(); + if (output_shape.rank().is_dynamic()) { + return false; + } + if (std::any_of(inputs.begin(), inputs.end(), [&](const ov::Input& input) { + auto input_shape = input.get_partial_shape(); + return input_shape.rank().is_dynamic() || + output_shape.rank().get_length() != input_shape.rank().get_length(); + })) { + return false; + } + + if (std::any_of(inputs.begin(), inputs.end(), [&](const ov::Input& input) { + for (size_t i = 0; i < output_shape.size(); ++i) { + auto input_shape = input.get_partial_shape(); + if (output_shape[i] != input_shape[i]) + return true; + if (output_shape[i].is_static() && input_shape[i].is_static()) + continue; + if (!ov::symbol::are_equal(output_shape[i].get_symbol(), input_shape[i].get_symbol())) + return true; + } + return false; + })) { + return false; + } + + return true; +} + } // namespace mlir } // namespace ov \ No newline at end of file diff --git a/src/common/transformations/src/transformations/mlir/convert_common.hpp b/src/common/transformations/src/transformations/mlir/convert_common.hpp index 7fd19fe875eb80..a33c99e6bedc57 100644 --- a/src/common/transformations/src/transformations/mlir/convert_common.hpp +++ b/src/common/transformations/src/transformations/mlir/convert_common.hpp @@ -30,6 +30,13 @@ RankedTensorType importTensor(MLIRContext* ctx, Location createLocation(MLIRContext* ctx, NodePtr node); +bool elementwise_no_broadcast_predicate_impl(const ov::Output& output, ov::element::Type type); + +template +bool elementwise_no_broadcast_predicate(const ov::Output& output) { + return elementwise_no_broadcast_predicate_impl(output, type); +} + // Borrowed it from TPP-MLIR. FIXME: Do we have a better upstreamed alternative? template mlir::arith::ConstantOp getConstant(OpBuilder &builder, const ov::element::Type& precision, T value) { diff --git a/src/common/transformations/src/transformations/mlir/mlir_op.cpp b/src/common/transformations/src/transformations/mlir/mlir_op.cpp index 8e7c4e1ce7d9db..51a7a8598f7c88 100644 --- a/src/common/transformations/src/transformations/mlir/mlir_op.cpp +++ b/src/common/transformations/src/transformations/mlir/mlir_op.cpp @@ -80,12 +80,24 @@ void prepareMLIRKernelWithoutWrapper(mlir::OwningOpRef& module) pm.addPass(bufferization::createEmptyTensorEliminationPass()); pm.addPass(bufferization::createOneShotBufferizePass()); - // TODO: Add deallocation pass/pipeline to avoid memory leaks. + pm.addNestedPass(bufferization::createFinalizingBufferizePass()); // Cleanup after bufferization - possibly remove redundant copies. pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass(createCSEPass()); + // Deallocation pipeline to avoid memory leaks from created temporary buffers. + pm.addPass(memref::createExpandReallocPass(/*emitDeallocs=*/false)); + pm.addPass(createCanonicalizerPass()); + bufferization::DeallocationOptions deallocOpts; + deallocOpts.privateFuncDynamicOwnership = false; + pm.addPass(bufferization::createOwnershipBasedBufferDeallocationPass(deallocOpts)); + pm.addPass(createCanonicalizerPass()); + pm.addPass(bufferization::createBufferDeallocationSimplificationPass()); + pm.addPass(bufferization::createLowerDeallocationsPass()); + pm.addPass(createCSEPass()); + pm.addPass(createCanonicalizerPass()); + // Blanket-convert any remaining high-level vector ops to loops if any remain. pm.addNestedPass(createConvertVectorToSCFPass()); // pm.addNestedPass(createLinalgGeneralizeNamedOpsPass()); diff --git a/src/common/transformations/src/transformations/mlir/op/relu.cpp b/src/common/transformations/src/transformations/mlir/op/relu.cpp new file mode 100644 index 00000000000000..a25f571f61cddf --- /dev/null +++ b/src/common/transformations/src/transformations/mlir/op/relu.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Linalg/Passes.h" + +#include +#include "openvino/pass/pattern/op/wrap_type.hpp" + +#include "relu.hpp" +#include "../convert_common.hpp" + +namespace { + +using namespace ov::mlir; + +struct ConvertRelu { + void operator()(ConversionContext& context, NodePtr node) { + auto loc = createLocation(context.context, node); + auto& builder = context.builder(); + // TODO: Support broadcasts + const auto input = context.getInputs(node)[0]; + const auto ov_output_element_type = node->get_output_element_type(0); + const auto ov_output_shape = node->get_output_partial_shape(0); + auto outType = importTensor(context.context, ov_output_shape, ov_output_element_type); + // Named unary ops directly overwrite data in `outs` buffer so, there is no need to provide non-empty + // destination at the tensor-level. + // Use `tensor.empty` to avoid temporary buffer allocation and memcpy after bufferization. + llvm::SmallVector dynamicSizes; + for (auto [idx, dim] : llvm::enumerate(outType.getShape())) { + if (!mlir::ShapedType::isDynamic(dim)) + continue; + auto dimSize = builder.create(loc, input, idx); + dynamicSizes.push_back(dimSize); + } + auto empty = builder.create(loc, outType, dynamicSizes); + auto zero = getConstant(builder, ov_output_element_type, 0); + auto fill = builder.create(loc, mlir::ValueRange{zero}, mlir::ValueRange{empty}); + auto relu = + builder.create(loc, mlir::ValueRange{input, fill.getResult(0)}, mlir::ValueRange{empty}); + context.addOutputs(node, relu); + } +}; + +} // namespace + +namespace ov { +namespace mlir { + +using namespace ov::pass::pattern; +using namespace ov::op; + +ReluPattern::ReluPattern() + : MarkPattern(wrap_type({any_input()}, elementwise_no_broadcast_predicate), + ConvertRelu()) {} + +} // namespace mlir +} // namespace ov diff --git a/src/common/transformations/src/transformations/mlir/op/relu.hpp b/src/common/transformations/src/transformations/mlir/op/relu.hpp new file mode 100644 index 00000000000000..a51c7366d834fb --- /dev/null +++ b/src/common/transformations/src/transformations/mlir/op/relu.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "mlir/IR/Builders.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Value.h" + +#include "../conversion_context.hpp" + +namespace ov { +namespace mlir { + +class ReluPattern : public MarkPattern { +public: + OPENVINO_RTTI("ReluPattern", "0"); + ReluPattern(); +}; + +} // namespace mlir +} // namespace ov