diff --git a/modules/nvidia_plugin/include/nvidia/nvidia_config.hpp b/modules/nvidia_plugin/include/nvidia/nvidia_config.hpp deleted file mode 100644 index ee501dfd6..000000000 --- a/modules/nvidia_plugin/include/nvidia/nvidia_config.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief A header that defines advanced related properties for DLIA plugins. - * These properties should be used in SetConfig() and LoadNetwork() methods of plugins - * - * @file nvidia_config.hpp - */ - -#pragma once - -#include - -#include "ie_plugin_config.hpp" - -namespace InferenceEngine { - -namespace CUDAMetrics { - -/** - * @def NVIDIA_METRIC_VALUE(name) - * @brief Shortcut for defining Template metric values - */ -#define NVIDIA_METRIC_VALUE(name) InferenceEngine::CUDAMetrics::name -#define DECLARE_NVIDIA_METRIC_VALUE(name) static constexpr auto name = #name - -// ! [public_header:metrics] -/** - * @brief Defines whether current Template device instance supports hardware blocks for fast convolution computations. - */ -DECLARE_NVIDIA_METRIC_VALUE(HARDWARE_CONVOLUTION); -// ! [public_header:metrics] - -} // namespace CUDAMetrics - -namespace CUDAConfigParams { - -/** - * @def NVIDIA_CONFIG_KEY(name) - * @brief Shortcut for defining Template device configuration keys - */ -#define NVIDIA_CONFIG_KEY(name) InferenceEngine::CUDAConfigParams::_CONFIG_KEY(NVIDIA_##name) -#define NVIDIA_CONFIG_VALUE(name) InferenceEngine::CUDAConfigParams::NVIDIA_##name - -#define DECLARE_NVIDIA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(NVIDIA_##name) -#define DECLARE_NVIDIA_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(NVIDIA_##name) - -DECLARE_NVIDIA_CONFIG_VALUE(YES); -DECLARE_NVIDIA_CONFIG_VALUE(NO); - -/** - * @brief Defines the number of throutput streams used by NVIDIA GPU plugin. - */ -DECLARE_NVIDIA_CONFIG_VALUE(THROUGHPUT_AUTO); -DECLARE_NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS); - -/** - * @brief Defines if optimization should be run for CUDA libraries ("NVIDIA_YES", "NVIDIA_NO" - default). - */ -DECLARE_NVIDIA_CONFIG_KEY(OPERATION_BENCHMARK); - -} // namespace CUDAConfigParams -} // namespace InferenceEngine diff --git a/modules/nvidia_plugin/src/cancellation_token.hpp b/modules/nvidia_plugin/src/cancellation_token.hpp index 89abad86f..911d0b6ae 100644 --- a/modules/nvidia_plugin/src/cancellation_token.hpp +++ b/modules/nvidia_plugin/src/cancellation_token.hpp @@ -4,8 +4,6 @@ #pragma once -#include - #include #include #include diff --git a/modules/nvidia_plugin/src/cuda/blas.hpp b/modules/nvidia_plugin/src/cuda/blas.hpp index c35826393..6bda4eb9d 100644 --- a/modules/nvidia_plugin/src/cuda/blas.hpp +++ b/modules/nvidia_plugin/src/cuda/blas.hpp @@ -5,7 +5,6 @@ #pragma once #include -#include #include "runtime.hpp" diff --git a/modules/nvidia_plugin/src/cuda_compiled_model.cpp b/modules/nvidia_plugin/src/cuda_compiled_model.cpp index 9bbf59063..aa69f3251 100644 --- a/modules/nvidia_plugin/src/cuda_compiled_model.cpp +++ b/modules/nvidia_plugin/src/cuda_compiled_model.cpp @@ -19,7 +19,6 @@ #include "memory_manager/cuda_immutable_memory_block_builder.hpp" #include "memory_manager/cuda_memory_manager.hpp" #include "memory_manager/model/cuda_memory_model_builder.hpp" -#include "nvidia/nvidia_config.hpp" #include "nvidia/properties.hpp" #include "openvino/runtime/exec_model_info.hpp" #include "openvino/runtime/internal_properties.hpp" diff --git a/modules/nvidia_plugin/src/cuda_compiled_model.hpp b/modules/nvidia_plugin/src/cuda_compiled_model.hpp index 804704d5b..46cdd47a3 100644 --- a/modules/nvidia_plugin/src/cuda_compiled_model.hpp +++ b/modules/nvidia_plugin/src/cuda_compiled_model.hpp @@ -32,7 +32,7 @@ class CompiledModel : public ov::ICompiledModel { const Configuration& cfg, const std::shared_ptr& wait_executor, const std::shared_ptr& plugin, - bool loaded_from_cache = false); + bool loaded_from_cache); ~CompiledModel(); diff --git a/modules/nvidia_plugin/src/cuda_config.cpp b/modules/nvidia_plugin/src/cuda_config.cpp index 83fc29d99..552ee2f88 100644 --- a/modules/nvidia_plugin/src/cuda_config.cpp +++ b/modules/nvidia_plugin/src/cuda_config.cpp @@ -3,10 +3,10 @@ // #include "cuda_config.hpp" +#include "openvino/runtime/internal_properties.hpp" #include -#include #include #include @@ -154,19 +154,6 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa if (ov::num_streams == key) { num_streams = value.as(); - } if (NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS) == key) { - if (value != NVIDIA_CONFIG_VALUE(THROUGHPUT_AUTO)) { - try { - num_streams = value.as(); - } catch (...) { - throw_ov_exception( - fmt::format("NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS) = {} " - "is not a number !!", - value.as())); - } - } else { - num_streams = ov::streams::AUTO; - } } else if (ov::device::id == key) { // Device id is updated already continue; @@ -215,11 +202,6 @@ ov::Any Configuration::get(const std::string& name) const { } else if (name == ov::num_streams) { return (num_streams == 0) ? ov::streams::Num(get_optimal_number_of_streams()) : num_streams; - } else if (name == NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS)) { - auto value = (num_streams == 0) ? - ov::streams::Num(get_optimal_number_of_streams()) : num_streams; - return (value == ov::streams::AUTO) ? NVIDIA_CONFIG_VALUE(THROUGHPUT_AUTO) - : ov::util::to_string(value); } else if (name == ov::hint::num_requests) { return hint_num_requests; } else if (name == ov::hint::inference_precision) { diff --git a/modules/nvidia_plugin/src/cuda_config.hpp b/modules/nvidia_plugin/src/cuda_config.hpp index 94cc18cdd..fafa84436 100644 --- a/modules/nvidia_plugin/src/cuda_config.hpp +++ b/modules/nvidia_plugin/src/cuda_config.hpp @@ -8,7 +8,6 @@ #include #include -#include "nvidia/nvidia_config.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/threading/istreams_executor.hpp" diff --git a/modules/nvidia_plugin/src/cuda_infer_request.cpp b/modules/nvidia_plugin/src/cuda_infer_request.cpp index 598501758..d0c118c85 100644 --- a/modules/nvidia_plugin/src/cuda_infer_request.cpp +++ b/modules/nvidia_plugin/src/cuda_infer_request.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include #include diff --git a/modules/nvidia_plugin/src/cuda_operation_base.cpp b/modules/nvidia_plugin/src/cuda_operation_base.cpp index 717c55d44..2eebaa296 100644 --- a/modules/nvidia_plugin/src/cuda_operation_base.cpp +++ b/modules/nvidia_plugin/src/cuda_operation_base.cpp @@ -5,6 +5,8 @@ #include #include "cuda_operation_base.hpp" +#include "openvino/core/node.hpp" + namespace ov { namespace nvidia_gpu { @@ -14,8 +16,8 @@ OperationBase::OperationBase(const CreationContext& /*context*/, IndexCollection&& outputIds) : node_name_{node.get_friendly_name()}, type_name_{node.get_type_info().name}, - input_ids_{move(inputIds)}, - output_ids_{move(outputIds)} { + input_ids_{inputIds}, + output_ids_{outputIds} { if (node.get_input_size() > 0) { runtime_precision_ = node.get_input_element_type(0); } else if (node.get_output_size() > 0) { diff --git a/modules/nvidia_plugin/src/cuda_operation_base.hpp b/modules/nvidia_plugin/src/cuda_operation_base.hpp index d95273175..09118458f 100644 --- a/modules/nvidia_plugin/src/cuda_operation_base.hpp +++ b/modules/nvidia_plugin/src/cuda_operation_base.hpp @@ -4,8 +4,6 @@ #pragma once -#include - #include #include #include diff --git a/modules/nvidia_plugin/src/cuda_operation_registry.hpp b/modules/nvidia_plugin/src/cuda_operation_registry.hpp index 89e5518e3..8af656eae 100644 --- a/modules/nvidia_plugin/src/cuda_operation_registry.hpp +++ b/modules/nvidia_plugin/src/cuda_operation_registry.hpp @@ -10,6 +10,7 @@ #include #include "cuda_operation_base.hpp" +#include "openvino/core/node.hpp" namespace ov { namespace nvidia_gpu { diff --git a/modules/nvidia_plugin/src/cuda_plugin.cpp b/modules/nvidia_plugin/src/cuda_plugin.cpp index cda4e2ec1..828ff0f91 100644 --- a/modules/nvidia_plugin/src/cuda_plugin.cpp +++ b/modules/nvidia_plugin/src/cuda_plugin.cpp @@ -3,18 +3,15 @@ // #include -#include "ie_metric_helpers.hpp" - -#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "cuda/props.hpp" #include "cuda_compiled_model.hpp" #include "cuda_infer_request.hpp" #include "cuda_itt.hpp" #include "cuda_operation_registry.hpp" #include "cuda_plugin.hpp" -#include "nvidia/nvidia_config.hpp" #include "openvino/core/op_extension.hpp" #include "openvino/op/util/op_types.hpp" +#include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/core.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/threading/executor_manager.hpp" @@ -74,7 +71,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto compiled_model = std::make_shared(model->clone(), full_config, wait_executor, - shared_from_this()); + shared_from_this(), + false); return compiled_model; } @@ -105,13 +103,22 @@ std::shared_ptr Plugin::import_model(std::istream& model_str auto model = get_core()->read_model(xml_string, weights); - auto full_config = get_full_config(properties); + // check ov::loaded_from_cache property and erase it due to not needed any more. + auto _properties = properties; + const auto& it = _properties.find(ov::loaded_from_cache.name()); + bool loaded_from_cache = false; + if (it != _properties.end()) { + loaded_from_cache = it->second.as(); + _properties.erase(it); + } + + auto full_config = get_full_config(_properties); auto wait_executor = get_stream_executor(full_config); auto compiled_model= std::make_shared(model, full_config, wait_executor, shared_from_this(), - true); + loaded_from_cache); return compiled_model; } @@ -193,8 +200,6 @@ void Plugin::set_property(const ov::AnyMap& properties) { } ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& properties) const { - using namespace InferenceEngine::CUDAMetrics; - auto full_config = get_full_config(properties); if (ov::supported_properties == name) { @@ -233,7 +238,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& properti ov::device::capability::EXPORT_IMPORT, ov::device::capability::FP32, ov::device::capability::FP16}}; - } else if (ov::range_for_streams == name) { + } else if (ov::range_for_streams == name) { return decltype(ov::range_for_streams)::value_type{1, Configuration::reasonable_limit_of_streams}; } else if (ov::range_for_async_infer_requests == name) { return decltype(ov::range_for_async_infer_requests)::value_type{1, 1, 1}; diff --git a/modules/nvidia_plugin/src/cuda_thread_pool.cpp b/modules/nvidia_plugin/src/cuda_thread_pool.cpp index d36eeb6f9..cc24887cd 100644 --- a/modules/nvidia_plugin/src/cuda_thread_pool.cpp +++ b/modules/nvidia_plugin/src/cuda_thread_pool.cpp @@ -6,8 +6,6 @@ #include -#include
- #include "cuda_latch.hpp" namespace ov { diff --git a/modules/nvidia_plugin/src/kernels/details/error.cpp b/modules/nvidia_plugin/src/kernels/details/error.cpp index a72860e0c..853b0e4b3 100644 --- a/modules/nvidia_plugin/src/kernels/details/error.cpp +++ b/modules/nvidia_plugin/src/kernels/details/error.cpp @@ -4,7 +4,6 @@ #include -#include
#include #include diff --git a/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.cpp b/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.cpp index 72a304be0..7c5ced6e1 100644 --- a/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.cpp +++ b/modules/nvidia_plugin/src/memory_manager/cuda_device_mem_block.cpp @@ -7,7 +7,6 @@ #include #include -#include
#include namespace ov { diff --git a/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.cpp b/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.cpp index faf030981..5909c05bd 100644 --- a/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.cpp +++ b/modules/nvidia_plugin/src/ops/convolution_components/convolution_cudnn_components.cpp @@ -9,7 +9,6 @@ #include #include -#include #include namespace ov::nvidia_gpu::Convolution::Details { diff --git a/modules/nvidia_plugin/src/ops/convolution_cudnn.cpp b/modules/nvidia_plugin/src/ops/convolution_cudnn.cpp index 66d25ab31..cee0b9131 100644 --- a/modules/nvidia_plugin/src/ops/convolution_cudnn.cpp +++ b/modules/nvidia_plugin/src/ops/convolution_cudnn.cpp @@ -6,7 +6,6 @@ #include -#include
#include #include "cuda/constant_factory.hpp" diff --git a/modules/nvidia_plugin/src/ops/convolution_cudnn_be.cpp b/modules/nvidia_plugin/src/ops/convolution_cudnn_be.cpp index d4557a936..2d4743bd2 100644 --- a/modules/nvidia_plugin/src/ops/convolution_cudnn_be.cpp +++ b/modules/nvidia_plugin/src/ops/convolution_cudnn_be.cpp @@ -7,7 +7,6 @@ #include #include -#include
#include #include diff --git a/modules/nvidia_plugin/src/ops/details/cuda_ie_api_import_fix.hpp b/modules/nvidia_plugin/src/ops/details/cuda_ie_api_import_fix.hpp deleted file mode 100644 index a58d02275..000000000 --- a/modules/nvidia_plugin/src/ops/details/cuda_ie_api_import_fix.hpp +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (C) 2021-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#undef INFERENCE_ENGINE_DEPRECATED -#define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg))) diff --git a/modules/nvidia_plugin/src/ops/details/cuda_ngraph_import_fix.hpp b/modules/nvidia_plugin/src/ops/details/cuda_ngraph_import_fix.hpp deleted file mode 100644 index f7086f6bf..000000000 --- a/modules/nvidia_plugin/src/ops/details/cuda_ngraph_import_fix.hpp +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (C) 2021-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#undef NGRAPH_DEPRECATED -#define NGRAPH_DEPRECATED(msg) __attribute__((deprecated(msg))) diff --git a/modules/nvidia_plugin/src/ops/fused_convolution_cudnn.cpp b/modules/nvidia_plugin/src/ops/fused_convolution_cudnn.cpp index 10f117d74..f103ece4e 100644 --- a/modules/nvidia_plugin/src/ops/fused_convolution_cudnn.cpp +++ b/modules/nvidia_plugin/src/ops/fused_convolution_cudnn.cpp @@ -6,7 +6,6 @@ #include -#include
#include #include diff --git a/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_be.cpp b/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_be.cpp index 9311177c9..c918434d9 100644 --- a/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_be.cpp +++ b/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_be.cpp @@ -8,7 +8,6 @@ #include #include -#include
#include #include diff --git a/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_decomposed.cpp b/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_decomposed.cpp index 088a41790..9318a9d93 100644 --- a/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_decomposed.cpp +++ b/modules/nvidia_plugin/src/ops/fused_convolution_cudnn_decomposed.cpp @@ -6,7 +6,6 @@ #include -#include
#include #include diff --git a/modules/nvidia_plugin/src/ops/logical_not.cpp b/modules/nvidia_plugin/src/ops/logical_not.cpp index 97d121e75..8c1e7e44d 100644 --- a/modules/nvidia_plugin/src/ops/logical_not.cpp +++ b/modules/nvidia_plugin/src/ops/logical_not.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // #include "logical_not.hpp" +#include "openvino/core/except.hpp" #include diff --git a/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp b/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp index 92c0f37d3..495d6f684 100644 --- a/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp +++ b/modules/nvidia_plugin/src/transformer/cuda_graph_transformer.hpp @@ -6,7 +6,6 @@ #include -#include "cpp/ie_cnn_network.h" #include "cuda_config.hpp" #include "openvino/core/model.hpp" diff --git a/modules/nvidia_plugin/tests/unit/compile_model.cpp b/modules/nvidia_plugin/tests/unit/compile_model.cpp index 934145a17..7d76a602f 100644 --- a/modules/nvidia_plugin/tests/unit/compile_model.cpp +++ b/modules/nvidia_plugin/tests/unit/compile_model.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -95,10 +94,6 @@ INSTANTIATE_TEST_SUITE_P(CompileModelTest, CompileModelTest::getTestCaseName); std::vector num_streams_1_properties = { - { - {CONFIG_KEY(DEVICE_ID), "0"}, - {NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS), "1"}, - }, { {ov::device::id.name(), "0"}, {ov::num_streams.name(), "1"}, @@ -138,10 +133,6 @@ INSTANTIATE_TEST_SUITE_P(CompileModelTest, CompileModelTest::getTestCaseName); std::vector num_streams_8_properties = { - { - {CONFIG_KEY(DEVICE_ID), "0"}, - {NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS), "8"}, - }, { {ov::device::id.name(), "0"}, {ov::num_streams.name(), "8"}, @@ -177,11 +168,6 @@ INSTANTIATE_TEST_SUITE_P(CompileModelTest, CompileModelTest::getTestCaseName); std::vector num_streams_8_properties_exclusive = { - { - {CONFIG_KEY(DEVICE_ID), "0"}, - {NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS), "8"}, - {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES)}, - }, { {ov::device::id.name(), "0"}, {ov::num_streams.name(), "8"}, @@ -220,10 +206,6 @@ INSTANTIATE_TEST_SUITE_P(CompileModelTest, CompileModelTest::getTestCaseName); std::vector num_streams_auto_properties = { - { - {CONFIG_KEY(DEVICE_ID), "0"}, - {NVIDIA_CONFIG_KEY(THROUGHPUT_STREAMS), NVIDIA_CONFIG_VALUE(THROUGHPUT_AUTO)}, - }, { {ov::device::id.name(), "0"}, {ov::num_streams.name(), ov::util::to_string(ov::streams::AUTO)}, diff --git a/modules/nvidia_plugin/tests/unit/memory_manager/model/cuda_memory_model_test.cpp b/modules/nvidia_plugin/tests/unit/memory_manager/model/cuda_memory_model_test.cpp index b9410ef43..63f42a9ad 100644 --- a/modules/nvidia_plugin/tests/unit/memory_manager/model/cuda_memory_model_test.cpp +++ b/modules/nvidia_plugin/tests/unit/memory_manager/model/cuda_memory_model_test.cpp @@ -6,8 +6,6 @@ #include -#include
- TEST(MemoryModel, Empty) { using namespace ov::nvidia_gpu;