diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake
index 5dcee285a5b13..e500957f864f8 100644
--- a/cmake/onnxruntime_providers_openvino.cmake
+++ b/cmake/onnxruntime_providers_openvino.cmake
@@ -11,22 +11,22 @@
"${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
)
- if (WIN32)
- set(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
- endif()
-
# Header paths
find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
- if(OpenVINO_VERSION VERSION_LESS 2024.0)
- message(FATAL_ERROR "OpenVINO 2024.0 and newer are supported. Please, use latest OpenVINO release")
+ if(OpenVINO_VERSION VERSION_LESS 2024.3)
+ message(FATAL_ERROR "OpenVINO 2024.3 and newer are supported. Please, use latest OpenVINO release")
endif()
if(OpenVINO_VERSION VERSION_GREATER_EQUAL 2024.4)
add_definitions(-DUSE_OVEP_NPU_MEMORY=1)
endif()
- if (WIN32)
- unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO)
+ # If building RelWithDebInfo and OV package does not have that configuration map to Release
+ get_target_property(ov_rt_implib_rwdi openvino::runtime IMPORTED_IMPLIB_RELWITHDEBINFO)
+ if ((CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo) AND NOT ov_rt_implib_rwdi)
+ set_target_properties(openvino::runtime PROPERTIES
+ MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release
+ )
endif()
list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES})
@@ -82,3 +82,8 @@
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
+
+set_target_properties(onnxruntime_providers_openvino PROPERTIES
+ MAP_IMPORTED_CONFIG_RELEASE RelWithDebInfo
+ MAP_IMPORTED_CONFIG_DEBUG RelWithDebInfo
+ )
\ No newline at end of file
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index b0c5d2329c428..b1a79f5921328 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -626,8 +626,13 @@ typedef struct OrtMIGraphXProviderOptions {
} OrtMIGraphXProviderOptions;
/** \brief OpenVINO Provider Options
- *
- * \see OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
+ * \brief This Struct is frozen since ORT 1.13.0. Its maintained part of Legacy API for compatibility.
+ * \brief For latest OpenVINO Provider Options update to the ProviderOptions map.
+ * \brief Latest OpenVINO Provider Options are listed in the
+ * \htmlonly
+ * onnxruntime document.
+ * \endhtmlonly
+ * \see OrtApi::SessionOptionsAppendExecutionProvider()
*/
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
@@ -645,7 +650,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
- unsigned char enable_npu_fast_compile;
+ unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 8a1844544328c..56cceb8cf2a19 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -120,8 +120,8 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr
} catch (const char* msg) {
ORT_THROW(msg);
}
-
- inferRequestsQueue_ = std::unique_ptr(new InferRequestsQueue(exe_network_, 1));
+ int num_infer_req = (global_context_.num_of_threads > 0) ? global_context_.num_of_threads : 1;
+ inferRequestsQueue_ = std::unique_ptr(new InferRequestsQueue(exe_network_, num_infer_req));
}
bool BasicBackend::ValidateSubgraph(std::map>& const_outputs_map) {
@@ -663,7 +663,6 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
// Requesting for an idle infer_request from a pool of infer_requests_
OVInferRequestPtr infer_request;
infer_request = inferRequestsQueue_->getIdleRequest();
-
#ifdef IO_BUFFER_ENABLED
if ((global_context_.device_type.find("GPU") != std::string::npos) &&
(global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 6e39f5832226c..72a188108adef 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -3,6 +3,8 @@
#include
#include
#include
+#include
+#include
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/openvino_execution_provider.h"
#include "core/providers/openvino/contexts.h"
@@ -187,15 +189,23 @@ common::Status OpenVINOExecutionProvider::Compile(
#ifdef USE_OVEP_NPU_MEMORY
std::vector OpenVINOExecutionProvider::CreatePreferredAllocators() {
- AllocatorCreationInfo npu_allocator_info{
- [this](OrtDevice::DeviceId device_id) {
- return std::make_unique(global_context_->ie_core.Get(), OrtDevice::NPU, device_id, OpenVINO_RT_NPU);
- },
- 0,
- };
-
- // fill in allocator
- return std::vector{CreateAllocator(npu_allocator_info)};
+ if (global_context_->device_type.find("NPU") != std::string::npos) {
+ AllocatorCreationInfo npu_allocator_info{
+ [this](OrtDevice::DeviceId device_id) {
+ return std::make_unique(
+ global_context_->ie_core.Get(),
+ OrtDevice::NPU,
+ device_id,
+ OpenVINO_RT_NPU);
+ },
+ 0,
+ };
+
+ // fill in allocator
+ return std::vector{CreateAllocator(npu_allocator_info)};
+ } else {
+ return std::vector{};
+ }
}
#endif
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index 16f06ad9dd1da..bea9badea475a 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -199,8 +199,8 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
#endif
private:
std::unique_ptr global_context_;
- openvino_ep::EPCtxHandler ep_ctx_handle_{};
std::shared_ptr backend_manager_;
+ openvino_ep::EPCtxHandler ep_ctx_handle_{};
};
} // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 0d7ac64d86e68..95c7466e02f2f 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -35,16 +35,14 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
device_type_ = "CPU";
if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
}
-#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
- data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled);
-#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 2
- data_ops_ = new DataOps(graph_viewer_, V_2024_2, device_type_, npu_qdq_optimizer_enabled);
-#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
+#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4
data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
+#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
+ data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
#else
- data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
+ data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
#endif
}
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index e8f6ae0a43734..b2c5fd6f83167 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -118,6 +118,7 @@ std::vector supported_op_mode = {
{"CumSum", V_2022_1, {"CPU", "GPU"}},
{"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
{"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
+ {"DequantizeLinear", V_2024_4, {"NPU"}},
{"Div", V_2020_4, {"CPU", "GPU"}},
{"Dropout", V_2020_4, {"CPU", "GPU"}},
{"Elu", V_2020_4, {"CPU", "GPU"}},
@@ -254,6 +255,8 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+ supported_types_initializer_.insert(
+ std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16));
supported_types_initializer_.insert(
@@ -262,6 +265,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_initializer_.insert(
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+ supported_types_initializer_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
+ supported_types_initializer_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
@@ -285,6 +292,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
supported_types_npu_.insert(
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ));
+ supported_types_npu_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
+ supported_types_npu_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
@@ -304,6 +315,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_cpu_.insert(
std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+ supported_types_cpu_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
+ supported_types_cpu_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
supported_types_gpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
@@ -319,6 +334,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_gpu_.insert(
std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+ supported_types_gpu_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
+ supported_types_gpu_.insert(
+ std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
}
void DataOps::populate_op_mode_supported() {
@@ -368,7 +387,7 @@ void DataOps::populate_op_mode_supported() {
// populate unsupportedmode_t
{
- UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4},
+ UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
[this](const Node* node, const InitializedTensorSet&) {
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
for (size_t i = 0; i < node->InputDefs().size(); i++) {
@@ -383,7 +402,7 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"ReduceMax", obj});
}
{
- UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
+ UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
[this](const Node* node, const InitializedTensorSet&) {
const auto& input_arg = node->InputDefs()[1];
auto shape = input_arg->Shape();
@@ -400,7 +419,7 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Reshape", obj});
}
{
- UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
+ UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
[this](const Node* node, const InitializedTensorSet&) {
// If the operator is unsqueeze
// If axes is an input, then we cannot produce a static graph.
@@ -415,7 +434,7 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Unsqueeze", obj});
}
{
- UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
+ UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
[this](const Node* node, const InitializedTensorSet&) {
// check for attributes
auto& upsample_attr = node->GetAttributes();
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index 5cd4c8658fb77..a2db56deca7cd 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -31,7 +31,8 @@ enum versionNum {
V_2024_1,
V_2024_2,
V_2024_3,
- V_2024_4
+ V_2024_4,
+ V_2024_5
};
using VersionNum = enum versionNum;
diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
index d8c0120fc9ee5..9ee589a3d6ef3 100644
--- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml
@@ -33,5 +33,5 @@ jobs:
parameters:
AgentPool : 'Linux-CPU-2019'
JobName: 'Linux_CI_Dev'
- RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.3.0 -x "--use_openvino CPU --build_wheel"'
+ RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.4.0 -x "--use_openvino CPU --build_wheel"'
TimeoutInMinutes: 120
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino
index 4c80e7a907630..8f3dcb69d6c56 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino
@@ -1,7 +1,7 @@
ARG UBUNTU_VERSION=22.04
FROM ubuntu:${UBUNTU_VERSION}
-ARG OPENVINO_VERSION=2024.3.0
+ARG OPENVINO_VERSION=2024.4.0
ARG PYTHON_VERSION=3.10
ADD scripts /tmp/scripts
@@ -19,9 +19,9 @@ ENV IE_PLUGINS_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64
ENV DEBIAN_FRONTEND=noninteractive
RUN cd /opt && mkdir -p intel && cd intel && \
- wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.3/linux/l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && \
- tar xzf l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && \
- mv l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64 openvino_2024.3.0 && \
+ wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \
+ tar xzf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \
+ mv l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64 openvino_2024.4.0 && \
cd $INTEL_OPENVINO_DIR/install_dependencies && ./install_openvino_dependencies.sh -y
WORKDIR /root