Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NVIDIA] Add checks for dynamism #696

Merged
merged 5 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions modules/nvidia_plugin/src/cuda_compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ void CompiledModel::compile_model(const std::shared_ptr<const ov::Model>& model)
// Apply transformations pipeline
transformer.transform(device, model_, config_);
}
if (model->is_dynamic()) {
throw_ov_exception("Dynamic models are not supported by NVIDIA plugin yet!");
}
// Generate backend specific blob mappings. For example Inference Engine uses not ov::Result nodes friendly name
// as inference request output names but the name of the layer before.
for (auto& result : model_->get_results()) {
Expand Down
3 changes: 3 additions & 0 deletions modules/nvidia_plugin/src/cuda_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(

bool Plugin::is_operation_supported(const std::shared_ptr<ov::Node>& node, const Configuration& config) const {
bool is_op_supported = false;
if (node->is_dynamic()) {
return false;
}
if (OperationRegistry::getInstance().hasOperation(node)) {
const TensorID dummyTensorID{0};
const CreationContext context{CUDA::Device{config.get_device_id()}, false};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ bool change_concat_to_concat_optimized(Matcher& m) {
}

const auto& outputShape = concat->get_output_shape(0);
const int64_t axis = concat->get_axis();
int64_t axis = concat->get_axis();
if (axis < 0) {
axis += outputShape.size();
}
if (axis < 0 || axis >= outputShape.size()) {
return false;
}
Expand Down Expand Up @@ -74,7 +77,7 @@ bool change_concat_to_concat_optimized(Matcher& m) {

ConcatTransformation::ConcatTransformation() {
MATCHER_SCOPE(ConcatTransformation);
auto concat = wrap_type<ov::op::v0::Concat>();
auto concat = wrap_type<ov::op::v0::Concat>(has_static_shape());

matcher_pass_callback callback = [](Matcher& m) { return change_concat_to_concat_optimized(m); };

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "concat_transformation.hpp"
#include "fuse_matmul_add.hpp"
#include "matmul_transformations.hpp"
#include "noop_broadcast_transformation.hpp"
#include "remove_duplicated_results_transformation.hpp"
#include "remove_redundant_convert_transformation.hpp"
#include "transformations/op_conversions/convert_divide.hpp"
Expand Down Expand Up @@ -132,7 +131,8 @@ void GraphTransformer::transform(const CUDA::Device& device,
pass_manager.register_pass<ov::nvidia_gpu::pass::TransposeMatMulTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::ConcatTransformation>();
pass_manager.register_pass<ov::nvidia_gpu::pass::NoopBroadcastTransformation>();
// Do we actually need to eliminate broadcast one more time at the end?
pass_manager.register_pass<ov::pass::NopElimination>();

pass_manager.run_passes(model);

Expand Down
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/src/transformer/fuse_matmul_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ std::pair<std::shared_ptr<ov::op::v0::MatMul>, std::shared_ptr<ov::op::v0::Const

bool is_add_to_be_fused(const ov::Output<ov::Node>& output) {
auto add_node = std::dynamic_pointer_cast<ov::op::v1::Add>(output.get_node_shared_ptr());
if (!add_node) {
if (!add_node || add_node->is_dynamic()) {
return false;
}
std::shared_ptr<ov::op::v0::MatMul> matmul_node;
std::shared_ptr<ov::op::v0::Constant> constant_node;
std::tie(matmul_node, constant_node) = get_matmul_constant_nodes(add_node);
if (!matmul_node || !constant_node) {
if (!matmul_node || !constant_node || matmul_node->is_dynamic()) {
return false;
}

Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include <tuple>

#include "transformer/concat_transformation.hpp"
#include "transformer/nodes/concat_optimized.hpp"

#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/core/model.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/utils/utils.hpp"

using ov::nvidia_gpu::nodes::ConcatOptimized;
using namespace ov;
using namespace std;

namespace testing {

TEST(concat_optimized, concat_2_inputs_axis_1) {
shared_ptr<ov::Model> model, model_ref;
int64_t axis = 1;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, input1}, axis);
model = make_shared<Model>(concat, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);
}
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto concat = make_shared<ConcatOptimized>(NodeVector{input0, input1}, axis);
model_ref = make_shared<Model>(concat, ParameterVector{input0, input1});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(concat_optimized, concat_2_inputs_several_concats) {
shared_ptr<ov::Model> model, model_ref;
int64_t axis = 1;

auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto input2 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 512});
auto concat0 = make_shared<op::v0::Concat>(NodeVector{input0, input1}, axis);
auto result0 = make_shared<op::v0::Result>(concat0);
auto concat1 = make_shared<op::v0::Concat>(NodeVector{input1, input2}, axis);
auto result1 = make_shared<op::v0::Result>(concat1);
model = make_shared<Model>(ResultVector{result0, result1}, ParameterVector{input0, input1, input2});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<ConcatOptimized>(model), 0);
}

TEST(concat_optimized, concat_3_inputs_axis_negative) {
shared_ptr<ov::Model> model, model_ref;
int64_t axis = -2;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto input2 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 256, 512});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, input1, input2}, axis);
model = make_shared<Model>(concat, ParameterVector{input0, input1, input2});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);
}
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 255, 512});
auto input2 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 256, 512});
auto concat = make_shared<ConcatOptimized>(NodeVector{input0, input1, input2}, axis);
model_ref = make_shared<Model>(concat, ParameterVector{input0, input1, input2});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(concat_optimized, concat_with_constant_fail) {
shared_ptr<ov::Model> model;
int64_t axis = 1;

auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1, 512});
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 255, 512}, {1});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, const_node}, axis);
model = make_shared<Model>(concat, ParameterVector{input0});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<ConcatOptimized>(model), 0);
}

TEST(concat_optimized, concat_dynamic_fail) {
shared_ptr<ov::Model> model;
int64_t axis = 1;

auto input0 = make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, 1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, 255, 512});
auto concat = make_shared<op::v0::Concat>(NodeVector{input0, input1}, axis);
model = make_shared<Model>(concat, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::ConcatTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<ConcatOptimized>(model), 0);
}

} // namespace testing
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,24 @@ TEST(fuse_matmul_add, parameter_variadic_split_matmul_add_constant) {
auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(fuse_matmul_add, parameters_matmul_dynamic) {
shared_ptr<ov::Model> model;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 512});
auto matmul = make_shared<op::v0::MatMul>(input0, input1, false, true);
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 1024}, {1});
auto add = make_shared<op::v1::Add>(matmul, const_node);
model = make_shared<Model>(add, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 1);
}
}

} // namespace testing
Loading