Skip to content

Commit

Permalink
[NVIDIA] Add checks for dynamism (#696)
Browse files Browse the repository at this point in the history
* [NVIDIA] Add check for dynamism in FullyConnectedTransformation transformation

* [NVIDIA] Add check for dynamism in ConcatTransformation transformation

* [NVIDIA] Replace NoopBroadcastTransformation by NopElimination

* [NVIDIA] Add checks for dynamic operations/model

* Fix windows build
  • Loading branch information
nkogteva authored Aug 1, 2023
1 parent 9968dbd commit cee24c7
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 66 deletions.
3 changes: 3 additions & 0 deletions modules/nvidia_plugin/src/cuda_compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ void CompiledModel::compile_model(const std::shared_ptr<const ov::Model>& model)
// Apply transformations pipeline
transformer.transform(device, model_, config_);
}
if (model->is_dynamic()) {
throw_ov_exception("Dynamic models are not supported by NVIDIA plugin yet!");
}
// Generate backend specific blob mappings. For example Inference Engine uses not ov::Result nodes friendly name
// as inference request output names but the name of the layer before.
for (auto& result : model_->get_results()) {
Expand Down
3 changes: 3 additions & 0 deletions modules/nvidia_plugin/src/cuda_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(

bool Plugin::is_operation_supported(const std::shared_ptr<ov::Node>& node, const Configuration& config) const {
bool is_op_supported = false;
if (node->is_dynamic()) {
return false;
}
if (OperationRegistry::getInstance().hasOperation(node)) {
const TensorID dummyTensorID{0};
const CreationContext context{CUDA::Device{config.get_device_id()}, false};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ bool change_concat_to_concat_optimized(Matcher& m) {
}

const auto& outputShape = concat->get_output_shape(0);
const int64_t axis = concat->get_axis();
int64_t axis = concat->get_axis();
if (axis < 0) {
axis += outputShape.size();
}
if (axis < 0 || axis >= outputShape.size()) {
return false;
}
Expand Down Expand Up @@ -74,7 +77,7 @@ bool change_concat_to_concat_optimized(Matcher& m) {

ConcatTransformation::ConcatTransformation() {
MATCHER_SCOPE(ConcatTransformation);
auto concat = wrap_type<ov::op::v0::Concat>();
auto concat = wrap_type<ov::op::v0::Concat>(has_static_shape());

matcher_pass_callback callback = [](Matcher& m) { return change_concat_to_concat_optimized(m); };

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "concat_transformation.hpp"
#include "fuse_matmul_add.hpp"
#include "matmul_transformations.hpp"
#include "noop_broadcast_transformation.hpp"
#include "remove_duplicated_results_transformation.hpp"
#include "remove_redundant_convert_transformation.hpp"
#include "transformations/op_conversions/convert_divide.hpp"
Expand Down Expand Up @@ -132,7 +131,8 @@ void GraphTransformer::transform(const CUDA::Device& device,
pass_manager.register_pass<ov::nvidia_gpu::pass::TransposeMatMulTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::ConcatTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::NoopBroadcastTransformation>();
// Do we actually need to eliminate broadcast one more time at the end?
pass_manager.register_pass<ov::pass::NopElimination>();

pass_manager.run_passes(model);

Expand Down
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/src/transformer/fuse_matmul_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ std::pair<std::shared_ptr<ov::op::v0::MatMul>, std::shared_ptr<ov::op::v0::Const

bool is_add_to_be_fused(const ov::Output<ov::Node>& output) {
auto add_node = std::dynamic_pointer_cast<ov::op::v1::Add>(output.get_node_shared_ptr());
if (!add_node) {
if (!add_node || add_node->is_dynamic()) {
return false;
}
std::shared_ptr<ov::op::v0::MatMul> matmul_node;
std::shared_ptr<ov::op::v0::Constant> constant_node;
std::tie(matmul_node, constant_node) = get_matmul_constant_nodes(add_node);
if (!matmul_node || !constant_node) {
if (!matmul_node || !constant_node || matmul_node->is_dynamic()) {
return false;
}

Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include <tuple>

#include "transformer/concat_transformation.hpp"
#include "transformer/nodes/concat_optimized.hpp"

#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/core/model.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/utils/utils.hpp"

using ov::nvidia_gpu::nodes::ConcatOptimized;
using namespace ov;
using namespace std;

namespace testing {

TEST(concat_optimized, concat_2_inputs_axis_1) {
shared_ptr<ov::Model> model, model_ref;
int64_t axis = 1;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, input1}, axis);
model = make_shared<Model>(concat, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);
}
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto concat = make_shared<ConcatOptimized>(NodeVector{input0, input1}, axis);
model_ref = make_shared<Model>(concat, ParameterVector{input0, input1});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(concat_optimized, concat_2_inputs_several_concats) {
shared_ptr<ov::Model> model, model_ref;
int64_t axis = 1;

auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto input2 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 512});
auto concat0 = make_shared<op::v0::Concat>(NodeVector{input0, input1}, axis);
auto result0 = make_shared<op::v0::Result>(concat0);
auto concat1 = make_shared<op::v0::Concat>(NodeVector{input1, input2}, axis);
auto result1 = make_shared<op::v0::Result>(concat1);
model = make_shared<Model>(ResultVector{result0, result1}, ParameterVector{input0, input1, input2});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<ConcatOptimized>(model), 0);
}

TEST(concat_optimized, concat_3_inputs_axis_negative) {
shared_ptr<ov::Model> model, model_ref;
int64_t axis = -2;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto input2 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 256, 512});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, input1, input2}, axis);
model = make_shared<Model>(concat, ParameterVector{input0, input1, input2});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);
}
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto input2 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 256, 512});
auto concat = make_shared<ConcatOptimized>(NodeVector{input0, input1, input2}, axis);
model_ref = make_shared<Model>(concat, ParameterVector{input0, input1, input2});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(concat_optimized, concat_with_constant_fail) {
shared_ptr<ov::Model> model;
int64_t axis = 1;

auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 255, 512}, {1});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, const_node}, axis);
model = make_shared<Model>(concat, ParameterVector{input0});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<ConcatOptimized>(model), 0);
}

TEST(concat_optimized, concat_dynamic_fail) {
shared_ptr<ov::Model> model;
int64_t axis = 1;

auto input0 = make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, 255, 512});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, input1}, axis);
model = make_shared<Model>(concat, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<ConcatOptimized>(model), 0);
}

} // namespace testing
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,24 @@ TEST(fuse_matmul_add, parameter_variadic_split_matmul_add_constant) {
auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(fuse_matmul_add, parameters_matmul_dynamic) {
shared_ptr<ov::Model> model;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 512});
auto matmul = make_shared<op::v0::MatMul>(input0, input1, false, true);
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 1024}, {1});
auto add = make_shared<op::v1::Add>(matmul, const_node);
model = make_shared<Model>(add, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 1);
}
}

} // namespace testing

0 comments on commit cee24c7

Please sign in to comment.