From 35adba21c7112ef5e51ea733db97bc4353e5b8c1 Mon Sep 17 00:00:00 2001
From: Vishnudas Thaniel S <vishnudas.thaniel.s@intel.com>
Date: Tue, 15 Oct 2024 00:40:01 +0530
Subject: [PATCH] Ovep develop lnl 1.2 (#22424)

### Description
Support OV2024.4
Refactor tensor initialization check for external weights
Support loading OV Config
OVEP: Tensor Caching fix, Fix accuracy issues
Refactor device memory implementation to make it more generic

### Motivation and Context
The changes are required to fix accuracy issues, support loading of OV
config, support OV2024.4

---------

Co-authored-by: Eric Crawford <eric.r.crawford@intel.com>
Co-authored-by: saurabhkale17 <saurabh1.kale@intel.com>
Co-authored-by: Javier E. Martinez <javier.e.martinez@intel.com>
Co-authored-by: sfatimar <sahar.fatima@intel.com>
Co-authored-by: ankitm3k <ankit.maheshkar@intel.com>
Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
Co-authored-by: n1harika <niharika.sathish@intel.com>
Co-authored-by: jatinwadhwa921 <110383850+jatinwadhwa921@users.noreply.github.com>
---
 cmake/CMakeLists.txt                          |   1 +
 cmake/onnxruntime_providers_openvino.cmake    |   2 +-
 .../core/session/onnxruntime_c_api.h          |   2 +-
 .../openvino/backends/basic_backend.cc        |  82 +++++-
 .../core/providers/openvino/contexts.h        |   4 +-
 .../openvino/openvino_execution_provider.cc   |   3 +-
 .../openvino/openvino_execution_provider.h    |  14 +-
 .../openvino/openvino_provider_factory.cc     | 262 ++++++++++--------
 .../openvino_provider_factory_creator.h       |   3 +-
 .../openvino/ov_versions/capability.cc        |  10 +-
 .../openvino/ov_versions/capability.h         |   4 +
 .../openvino/ov_versions/data_ops.cc          |  34 ++-
 .../providers/openvino/ov_versions/data_ops.h |  10 +-
 .../shared_library/provider_interfaces.h      |   2 +
 .../shared_library/provider_wrappedtypes.h    |   4 +
 .../core/session/provider_bridge_ort.cc       |  66 ++---
 .../python/onnxruntime_pybind_state.cc        |  12 +-
 .../test/perftest/command_args_parser.cc      |   3 +-
 onnxruntime/test/perftest/ort_test_session.cc | 127 +++++----
 onnxruntime/test/perftest/ort_test_session.h  |   3 +-
 onnxruntime/test/providers/cpu/model_tests.cc |   4 +-
 onnxruntime/test/util/default_providers.cc    |   6 +-
 .../test/util/include/default_providers.h     |   2 +-
 23 files changed, 379 insertions(+), 281 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 2e9be26fb9920..ef208f59f63b0 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1352,6 +1352,7 @@ if (onnxruntime_USE_OPENVINO)
   add_definitions(-DUSE_OPENVINO=1)
 
   if(onnxruntime_NPU_NO_FALLBACK)
+    add_definitions(-DOPENVINO_CONFIG_NPU=1)
     add_definitions(-DOPENVINO_DISABLE_NPU_FALLBACK=1)
   endif()
 
diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake
index 2eb3611bae902..5dcee285a5b13 100644
--- a/cmake/onnxruntime_providers_openvino.cmake
+++ b/cmake/onnxruntime_providers_openvino.cmake
@@ -37,7 +37,7 @@
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
   onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
-  onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
+  onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
   install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
   set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 9cd5d6169bb52..9e71997c1e442 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -645,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions {
    * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
    */
   const char* device_type;
-  unsigned char enable_npu_fast_compile;  ///< 0 = disabled, nonzero = enabled
+  unsigned char enable_npu_fast_compile;
   const char* device_id;
   size_t num_of_threads;  ///< 0 = Use default number of threads
   const char* cache_dir;  // path is set to empty by default
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 71a02f076c8cc..8a1844544328c 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -83,7 +83,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                            subgraph_context_.subgraph_name);
         ie_cnn_network_ = exe_network_.Get().get_runtime_model();
       } else if (global_context_.export_ep_ctx_blob &&
-                 hw_target.find("NPU") != std::string::npos) {
+                 hw_target.find("NPU") != std::string::npos &&
+                 !global_context_.has_external_weights) {
         std::shared_ptr<ov::Model> ov_model;
         {
           const std::string model = model_proto->SerializeAsString();
@@ -93,7 +94,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
           ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor());
         }
         exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config));
-      } else if ((!subgraph_context_.has_dynamic_input_shape) &&
+      } else if (!global_context_.has_external_weights &&
+                 (!subgraph_context_.has_dynamic_input_shape) &&
                  ((hw_target.find("AUTO") == std::string::npos) ||
                   (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) {
         // Optimized OV compile_model API is supported with AUTO from version 2024.3 and above
@@ -178,6 +180,74 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
     }
 #endif
   }
+
+  if (!global_context_.load_config.empty()) {
+    const std::map<std::string, ov::AnyMap>& target_config = global_context_.load_config;
+
+    // Parse device types like "AUTO:CPU,GPU" and extract individual devices
+    auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
+      std::vector<std::string> devices;
+      auto delimiter_pos = device_type.find(':');
+      if (delimiter_pos != std::string::npos) {
+        std::stringstream str_stream(device_type.substr(delimiter_pos + 1));
+        std::string device;
+        while (std::getline(str_stream, device, ',')) {
+          devices.emplace_back(device);
+        }
+      } else {
+        devices.emplace_back(device_type);
+      }
+      return devices;
+    };
+
+    // Check if a property is supported and mutable
+    auto is_supported_and_mutable = [&](const std::string& key,
+                                        const std::vector<ov::PropertyName>& supported_config) -> bool {
+      auto it = std::find_if(supported_config.begin(), supported_config.end(), [&](const ov::PropertyName& property) {
+        return property == key && property.is_mutable();
+      });
+      return it != supported_config.end();
+    };
+
+    // Set properties if they are valid, else log a warning if the property is missing or immutable by skipping the same
+    auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options,
+                                     const std::vector<ov::PropertyName>& supported_properties) {
+      for (const auto& [key, value] : config_options) {
+        if (is_supported_and_mutable(key, supported_properties)) {
+          global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
+        } else {
+          LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key
+                                << "\" is either unsupported in current OpenVINO version"
+                                << " or property is immutable for target device \""
+                                << device << "\". Skipping setting this property.";
+        }
+      }
+    };
+
+    // Check if the device type is AUTO, HETERO, or MULTI
+    if (global_context_.device_type.find("AUTO") == 0 ||
+        global_context_.device_type.find("HETERO") == 0 ||
+        global_context_.device_type.find("MULTI") == 0) {
+      // Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
+      auto individual_devices = parse_individual_devices(global_context_.device_type);
+      // Set properties only for individual devices (e.g., "CPU", "GPU")
+      for (const std::string& device : individual_devices) {
+        if (target_config.count(device)) {
+          // Get supported properties for each individual device
+          auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties);
+          // Set properties for the device
+          set_target_properties(device, target_config.at(device), device_properties);
+        }
+      }
+    } else {
+      if (target_config.count(global_context_.device_type)) {
+        auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type,
+                                                                               ov::supported_properties);
+        set_target_properties(global_context_.device_type,
+                              target_config.at(global_context_.device_type), supported_properties);
+      }
+    }
+  }
 }
 
 void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
@@ -275,7 +345,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
           input_tensor_shape[tensor_iter] = *i;
           tensor_iter += 1;
         }
-        auto input = graph_input_info.at(input_idx);
+        const auto& input = graph_input_info.at(input_idx);
         OVTensorPtr tensor_ptr;
         // avoid input copies on the CPU device
         if (global_context_.device_type.find("CPU") != std::string::npos) {
@@ -316,7 +386,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
             ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
 
             try {
-              infer_request->SetTensor(input_name, ov_tensor_data.tensor_ptr);
+              infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr);
             } catch (const char* msg) {
               ORT_THROW(msg);
             }
@@ -354,14 +424,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         if ((it == ort_ov_tensor_map.end()) ||
             (it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
           ov_tensor_data_t ov_tensor_data;
-          auto output = graph_output_info.at(output_idx);
+          const auto& output = graph_output_info.at(output_idx);
           ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
           ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
                                                                    const_cast<void*>(tensor.GetTensorRawData()));
           ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
 
           try {
-            infer_request->SetTensor(output_name, ov_tensor_data.tensor_ptr);
+            infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr);
           } catch (const char* msg) {
             ORT_THROW(msg);
           }
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 598e985676f8d..a2f4b236213cc 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <vector>
+#include <map>
 #include <unordered_map>
 #include <string>
 #include "core/providers/openvino/ov_interface.h"
@@ -15,18 +16,19 @@ namespace openvino_ep {
 struct GlobalContext {
   OVCore ie_core;
   bool is_wholly_supported_graph = false;
-  bool enable_npu_fast_compile = false;
   bool enable_opencl_throttling = false;
   bool disable_dynamic_shapes = false;
   bool ep_context_embed_mode = true;
   bool export_ep_ctx_blob = false;
   bool enable_qdq_optimizer = false;
   bool disable_cpu_fallback = false;
+  bool has_external_weights = false;
   size_t num_of_threads;
   std::string device_type;
   std::string precision_str;
   std::string model_precision;
   std::string cache_dir;
+  std::map<std::string, ov::AnyMap> load_config;
   std::string model_priority = "DEFAULT";
   int num_streams;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 08144651319cf..19a634818a442 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -25,8 +25,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   global_context_ = std::make_unique<openvino_ep::GlobalContext>();
   global_context_->device_type = info.device_type_;
   global_context_->precision_str = info.precision_;
-  global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
   global_context_->cache_dir = info.cache_dir_;
+  global_context_->load_config = info.load_config_;
   global_context_->model_priority = info.model_priority_;
   global_context_->num_streams = info.num_streams_;
   global_context_->context = info.context_;
@@ -124,6 +124,7 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   result = obj.Execute();
 
   global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph();
+  global_context_->has_external_weights = obj.HasExternalWeights();
 
   return result;
 }
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index 8b1c62c607f6e..7d9da65ea7e07 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -79,8 +79,8 @@ static std::vector<std::string> parseDevices(const std::string& device_string,
 struct OpenVINOExecutionProviderInfo {
   std::string device_type_{""};
   std::string precision_{""};
-  bool enable_npu_fast_compile_{false};
   size_t num_of_threads_{0};
+  std::map<std::string, ov::AnyMap> load_config_{};
   std::string cache_dir_{""};
   std::string model_priority_{""};
   int num_streams_{1};
@@ -94,16 +94,18 @@ struct OpenVINOExecutionProviderInfo {
 
   OpenVINOExecutionProviderInfo() = delete;
 
-  explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision,
-                                         bool enable_npu_fast_compile, size_t num_of_threads,
-                                         const std::string& cache_dir, const std::string& model_priority,
-                                         int num_streams, void* context, bool enable_opencl_throttling,
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, const std::string& precision,
+                                         size_t num_of_threads,
+                                         const std::map<std::string, ov::AnyMap>& load_config,
+                                         const std::string& cache_dir,
+                                         const std::string& model_priority, int num_streams,
+                                         void* context, bool enable_opencl_throttling,
                                          bool disable_dynamic_shapes, bool export_ep_ctx_blob,
                                          bool enable_qdq_optimizer, bool disable_cpu_fallback,
                                          bool so_epctx_embed_mode)
       : precision_(std::move(precision)),
-        enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
+        load_config_(std::move(load_config)),
         cache_dir_(std::move(cache_dir)),
         model_priority_(std::move(model_priority)),
         num_streams_(num_streams),
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 077ecc717502f..b46106db3c232 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -1,65 +1,81 @@
 // Copyright (C) Intel Corporation
 // Licensed under the MIT License
 
+#include <map>
+#include <utility>
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/openvino/openvino_provider_factory.h"
 #include "core/providers/openvino/openvino_execution_provider.h"
 #include "core/providers/openvino/openvino_provider_factory_creator.h"
+#include "nlohmann/json.hpp"
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device_type, const char* precision,
-                          bool enable_npu_fast_compile, size_t num_of_threads,
-                          const char* cache_dir, const char* model_priority,
-                          int num_streams, void* context,
+  OpenVINOProviderFactory(const std::string& device_type, const std::string& precision,
+                          size_t num_of_threads,
+                          const std::map<std::string, ov::AnyMap>& load_config, const std::string& cache_dir,
+                          const std::string& model_priority, int num_streams, void* context,
                           bool enable_opencl_throttling, bool disable_dynamic_shapes,
-                          bool export_ep_ctx_blob, bool enable_qdq_optimizer,
-                          bool disable_cpu_fallback,
-                          bool so_epctx_embed_mode)
-      : precision_(precision),
-        enable_npu_fast_compile_(enable_npu_fast_compile),
+                          bool enable_qdq_optimizer, const ConfigOptions& config_options)
+      : device_type_(device_type),
+        precision_(precision),
         num_of_threads_(num_of_threads),
+        load_config_(load_config),
+        cache_dir_(cache_dir),
         model_priority_(model_priority),
         num_streams_(num_streams),
         context_(context),
         enable_opencl_throttling_(enable_opencl_throttling),
         disable_dynamic_shapes_(disable_dynamic_shapes),
-        export_ep_ctx_blob_(export_ep_ctx_blob),
         enable_qdq_optimizer_(enable_qdq_optimizer),
-        disable_cpu_fallback_(disable_cpu_fallback),
-        so_epctx_embed_mode_(so_epctx_embed_mode) {
-    device_type_ = (device_type == nullptr) ? "" : device_type;
-    cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
-  }
+        config_options_(config_options) {}
 
-  ~OpenVINOProviderFactory() override {
-  }
+  ~OpenVINOProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
 
  private:
   std::string device_type_;
   std::string precision_;
-  bool enable_npu_fast_compile_;
   size_t num_of_threads_;
+  const std::map<std::string, ov::AnyMap> load_config_;
   std::string cache_dir_;
   std::string model_priority_;
   int num_streams_;
   void* context_;
   bool enable_opencl_throttling_;
   bool disable_dynamic_shapes_;
-  bool export_ep_ctx_blob_;
   bool enable_qdq_optimizer_;
-  bool disable_cpu_fallback_;
-  bool so_epctx_embed_mode_;
+  const ConfigOptions& config_options_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
+  bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault("session.disable_cpu_ep_fallback", "0") == "1";
+  bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault("ep.context_enable", "0") == "1";
+  bool so_epctx_embed_mode = config_options_.GetConfigOrDefault("ep.context_embed_mode", "1") == "1";
+  std::string so_cache_path = config_options_.GetConfigOrDefault("ep.context_file_path", "").c_str();
+
+  if (so_export_ep_ctx_blob && !so_cache_path.empty()) {
+    cache_dir_ = so_cache_path;
+    auto file_path = std::filesystem::path(cache_dir_);
+    // ep_context_file_path_ file extension must be .onnx
+    if (file_path.extension().generic_string() == ".onnx") {
+      // ep_context_file_path_ must be provided as a directory, create it if doesn't exist
+      auto parent_path = file_path.parent_path();
+      if (!parent_path.empty() && !std::filesystem::is_directory(parent_path) &&
+          !std::filesystem::create_directory(parent_path)) {
+        ORT_THROW("[ERROR] [OpenVINO] Failed to create directory : " +
+                  file_path.parent_path().generic_string() + " \n");
+      }
+    } else {
+      ORT_THROW("[ERROR] [OpenVINO] Invalid ep_ctx_file_path" + cache_dir_ + " \n");
+    }
+  }
+
+  OpenVINOExecutionProviderInfo info(device_type_, precision_, num_of_threads_, load_config_,
                                      cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
-                                     disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_,
-                                     disable_cpu_fallback_,
-                                     so_epctx_embed_mode_);
+                                     disable_dynamic_shapes_, so_export_ep_ctx_blob, enable_qdq_optimizer_,
+                                     so_disable_cpu_fallback, so_epctx_embed_mode);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
@@ -77,41 +93,42 @@ struct OpenVINO_Provider : Provider {
   void* GetInfo() override { return &g_info; }
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
-    auto& provider_options_map = *reinterpret_cast<const ProviderOptions*>(void_params);
-
-    std::string device_type = "";            // [device_type]: Overrides the accelerator hardware type and precision
-                                             //   with these values at runtime.
-    std::string precision = "";              // [precision]: Sets the inference precision for execution.
-                                             // Supported precision for devices are CPU=FP32, GPU=FP32,FP16, NPU=FP16.
-                                             // Not setting precision will execute with optimized precision for
-                                             // best inference latency. set Precision=ACCURACY for executing models
-                                             // with input precision for best accuracy.
-    bool enable_npu_fast_compile = false;    // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
-                                             // speeds up the model's compilation to NPU device specific format.
-    int num_of_threads = 0;                  // [num_of_threads]: Overrides the accelerator default value of number of
-                                             //  threads with this value at runtime.
-    std::string cache_dir = "";              // [cache_dir]: specify the path to
-                                             // dump and load the blobs for the model caching/kernel caching (GPU)
-                                             // feature. If blob files are already present, it will be directly loaded.
-    const char* model_priority = "DEFAULT";  // High-level OpenVINO model priority hint
-                                             // Defines what model should be provided with more performant
-                                             // bounded resource first
-    int num_streams = 1;                     // [num_streams]: Option that specifies the number of parallel inference
-                                             // requests to be processed on a given `device_type`. Overrides the
-                                             // accelerator default value of number of streams
-                                             // with this value at runtime.
-    bool enable_opencl_throttling = false;   // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
-                                             // device (Reduces CPU Utilization when using GPU)
-    bool export_ep_ctx_blob = false;         // Whether to export the pre-compiled blob as an EPContext model.
+    // Extract the void_params into ProviderOptions and ConfigOptions
+    typedef std::pair<const ProviderOptions*, const ConfigOptions&> ConfigBuffer;
+    const ConfigBuffer* buffer = reinterpret_cast<const ConfigBuffer*>(void_params);
+    auto& provider_options_map = *buffer->first;
+    const ConfigOptions& config_options = buffer->second;
+
+    std::string device_type = "";                   // [device_type]: Overrides the accelerator hardware type and
+                                                    // precision with these values at runtime.
+    std::string precision = "";                     // [precision]: Sets the inference precision for execution.
+                                                    // Supported precision for devices are
+                                                    // CPU=FP32, GPU=FP32,FP16, NPU=FP16.
+                                                    // Not setting precision will execute with optimized precision for
+                                                    // best inference latency. set Precision=ACCURACY for executing
+                                                    // models with input precision for best accuracy.
+    int num_of_threads = 0;                         // [num_of_threads]: Overrides the accelerator default value of
+                                                    // number of threads with this value at runtime.
+    std::map<std::string, ov::AnyMap> load_config;  // JSON config map to load custom OV parameters.
+    std::string cache_dir = "";                     // [cache_dir]: specify the path to
+                                                    // dump and load the blobs for the model caching/kernel caching
+                                                    // (GPU) feature. If blob files are already present,
+                                                    // it will be directly loaded.
+    std::string model_priority = "DEFAULT";         // High-level OpenVINO model priority hint
+                                                    // Defines what model should be provided with more performant
+                                                    // bounded resource first
+    int num_streams = 1;                            // [num_streams]: Option that specifies the number of parallel
+                                                    // inference requests to be processed on a given `device_type`.
+                                                    // Overrides the accelerator default value of number of streams
+                                                    // with this value at runtime.
+    bool enable_opencl_throttling = false;          // [enable_opencl_throttling]: Enables OpenCL queue throttling for
+                                                    // GPU device (Reduces CPU Utilization when using GPU)
+
+    bool enable_qdq_optimizer = false;  // Enables QDQ pruning for efficient inference latency with NPU
 
     void* context = nullptr;
 
-    bool enable_qdq_optimizer = false;
-
-    bool disable_cpu_fallback = false;
-
-    bool so_epctx_embed_mode = true;
-
+    std::string bool_flag = "";
     if (provider_options_map.find("device_type") != provider_options_map.end()) {
       device_type = provider_options_map.at("device_type").c_str();
 
@@ -185,6 +202,68 @@ struct OpenVINO_Provider : Provider {
       cache_dir = provider_options_map.at("cache_dir");
     }
 
+    if (provider_options_map.find("load_config") != provider_options_map.end()) {
+      auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
+        // If the config string is empty, return an empty map and skip processing
+        if (config_str.empty()) {
+          LOGS_DEFAULT(WARNING) << "Empty OV Config Map passed. Skipping load_config option parsing.\n";
+          return {};
+        }
+
+        std::stringstream input_str_stream(config_str);
+        std::map<std::string, ov::AnyMap> target_map;
+
+        try {
+          nlohmann::json json_config = nlohmann::json::parse(input_str_stream);
+
+          if (!json_config.is_object()) {
+            ORT_THROW("Invalid JSON structure: Expected an object at the root.");
+          }
+
+          for (auto& [key, value] : json_config.items()) {
+            ov::AnyMap inner_map;
+
+            // Ensure the key is one of "CPU", "GPU", or "NPU"
+            if (key != "CPU" && key != "GPU" && key != "NPU") {
+              LOGS_DEFAULT(WARNING) << "Unsupported device key: " << key << ". Skipping entry.\n";
+              continue;
+            }
+
+            // Ensure that the value for each device is an object (PROPERTY -> VALUE)
+            if (!value.is_object()) {
+              ORT_THROW("Invalid JSON structure: Expected an object for device properties.");
+            }
+
+            for (auto& [inner_key, inner_value] : value.items()) {
+              if (inner_value.is_string()) {
+                inner_map[inner_key] = inner_value.get<std::string>();
+              } else if (inner_value.is_number_integer()) {
+                inner_map[inner_key] = inner_value.get<int64_t>();
+              } else if (inner_value.is_number_float()) {
+                inner_map[inner_key] = inner_value.get<double>();
+              } else if (inner_value.is_boolean()) {
+                inner_map[inner_key] = inner_value.get<bool>();
+              } else {
+                LOGS_DEFAULT(WARNING) << "Unsupported JSON value type for key: " << inner_key << ". Skipping key.";
+              }
+            }
+            target_map[key] = inner_map;
+          }
+        } catch (const nlohmann::json::parse_error& e) {
+          // Handle syntax errors in JSON
+          ORT_THROW("JSON parsing error: " + std::string(e.what()));
+        } catch (const nlohmann::json::type_error& e) {
+          // Handle invalid type accesses
+          ORT_THROW("JSON type error: " + std::string(e.what()));
+        } catch (const std::exception& e) {
+          ORT_THROW("Error parsing load_config Map: " + std::string(e.what()));
+        }
+        return target_map;
+      };
+
+      load_config = parse_config(provider_options_map.at("load_config"));
+    }
+
     if (provider_options_map.find("context") != provider_options_map.end()) {
       std::string str = provider_options_map.at("context");
       uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
@@ -224,16 +303,6 @@ struct OpenVINO_Provider : Provider {
                               << "Executing with num_streams=1";
       }
     }
-    std::string bool_flag = "";
-    if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("enable_npu_fast_compile");
-      if (bool_flag == "true" || bool_flag == "True")
-        enable_npu_fast_compile = true;
-      else if (bool_flag == "false" || bool_flag == "False")
-        enable_npu_fast_compile = false;
-      bool_flag = "";
-    }
-
     if (provider_options_map.find("enable_opencl_throttling") != provider_options_map.end()) {
       bool_flag = provider_options_map.at("enable_opencl_throttling");
       if (bool_flag == "true" || bool_flag == "True")
@@ -249,6 +318,8 @@ struct OpenVINO_Provider : Provider {
         enable_qdq_optimizer = true;
       else if (bool_flag == "false" || bool_flag == "False")
         enable_qdq_optimizer = false;
+      else
+        ORT_THROW("[ERROR] [OpenVINO-EP] enable_qdq_optimiser should be a boolean.\n");
       bool_flag = "";
     }
 
@@ -271,68 +342,21 @@ struct OpenVINO_Provider : Provider {
           disable_dynamic_shapes = false;
         }
       }
-    }
-    if (provider_options_map.find("so_export_ep_ctx_blob") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("so_export_ep_ctx_blob");
-      if (bool_flag == "true" || bool_flag == "True")
-        export_ep_ctx_blob = true;
-      else if (bool_flag == "false" || bool_flag == "False")
-        export_ep_ctx_blob = false;
-      bool_flag = "";
-    }
-
-    if (provider_options_map.find("disable_cpu_fallback") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("disable_cpu_fallback");
-      if (bool_flag == "true" || bool_flag == "True")
-        disable_cpu_fallback = true;
-      else if (bool_flag == "false" || bool_flag == "False")
-        disable_cpu_fallback = false;
-      bool_flag = "";
-    }
-    if (provider_options_map.find("so_epctx_embed_mode") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("so_epctx_embed_mode");
-      if (bool_flag == "true" || bool_flag == "True")
-        so_epctx_embed_mode = true;
-      else if (bool_flag == "false" || bool_flag == "False")
-        so_epctx_embed_mode = false;
       bool_flag = "";
     }
 
-    if (provider_options_map.find("so_epctx_path") != provider_options_map.end()) {
-      // The path to dump epctx model is valid only when epctx is enabled.
-      // Overrides the cache_dir option to dump model cache files from OV.
-      if (export_ep_ctx_blob &&
-          !provider_options_map.at("so_epctx_path").empty()) {
-        cache_dir = provider_options_map.at("so_epctx_path");
-        auto file_path = std::filesystem::path(cache_dir);
-        // ep_context_file_path_ file extension must be .onnx
-        if (file_path.extension().generic_string() == ".onnx") {
-          // ep_context_file_path_ must be provided as a directory, create it if doesn't exist
-          auto parent_path = file_path.parent_path();
-          if (!parent_path.empty() && !std::filesystem::is_directory(parent_path) &&
-              !std::filesystem::create_directory(parent_path)) {
-            ORT_THROW("[ERROR] [OpenVINO] Failed to create directory : " + file_path.parent_path().generic_string() + " \n");
-          }
-        } else {
-          ORT_THROW("[ERROR] [OpenVINO] Invalid ep_ctx_file_path" + cache_dir + " \n");
-        }
-      }
-    }
-
-    return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
-                                                     const_cast<char*>(precision.c_str()),
-                                                     enable_npu_fast_compile,
+    return std::make_shared<OpenVINOProviderFactory>(device_type,
+                                                     precision,
                                                      num_of_threads,
-                                                     const_cast<char*>(cache_dir.c_str()),
+                                                     load_config,
+                                                     cache_dir,
                                                      model_priority,
                                                      num_streams,
                                                      context,
                                                      enable_opencl_throttling,
                                                      disable_dynamic_shapes,
-                                                     export_ep_ctx_blob,
                                                      enable_qdq_optimizer,
-                                                     disable_cpu_fallback,
-                                                     so_epctx_embed_mode);
+                                                     config_options);
   }
 
   void Initialize() override {
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
index bff70a90b6a70..0cbf051c6df26 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
@@ -14,8 +14,7 @@ namespace onnxruntime {
 struct SessionOptions;
 // defined in provider_bridge_ort.cc
 struct OpenVINOProviderFactoryCreator {
-  static std::shared_ptr<IExecutionProviderFactory> Create(ProviderOptions* provider_options_map,
+  static std::shared_ptr<IExecutionProviderFactory> Create(const ProviderOptions* provider_options_map,
                                                            const SessionOptions* session_options);
-  static std::shared_ptr<IExecutionProviderFactory> Create(const OrtOpenVINOProviderOptions* provider_options);
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 3fcaff4369c89..0d7ac64d86e68 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -35,16 +35,16 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
     device_type_ = "CPU";
     if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
   }
-#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0
-  data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, npu_qdq_optimizer_enabled);
-#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
+#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
   data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled);
 #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 2
   data_ops_ = new DataOps(graph_viewer_, V_2024_2, device_type_, npu_qdq_optimizer_enabled);
 #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
   data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
+#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4
+  data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
 #else
-  data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
+  data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
 #endif
 }
 
@@ -59,7 +59,7 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
   std::unordered_set<std::string> ng_required_initializers;
 
-  const auto unsupported_nodes = data_ops_->GetUnsupportedNodeIndices(ng_required_initializers);
+  const auto unsupported_nodes = data_ops_->GetUnsupportedNodeIndices(ng_required_initializers, has_external_weights_);
 #ifndef NDEBUG
   if (openvino_ep::backend_utils::IsDebugEnabled()) {
     std::cout << "No of unsupported nodes " << unsupported_nodes.size() << std::endl;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h
index 63c83158accf8..2f87c4c73d892 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h
@@ -16,6 +16,7 @@ class GetCapability {
   std::string device_type_;
   DataOps* data_ops_;
   bool is_wholly_supported_graph_ = false;
+  bool has_external_weights_ = false;
 
  public:
   GetCapability(const GraphViewer& graph_viewer_param,
@@ -25,6 +26,9 @@ class GetCapability {
   bool IsWhollySupportedGraph() {
     return is_wholly_supported_graph_;
   }
+  bool HasExternalWeights() {
+    return has_external_weights_;
+  }
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index d9aa13ec1bba9..e8f6ae0a43734 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -281,6 +281,10 @@ void DataOps::populate_types_supported() {
       std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
   supported_types_npu_.insert(
       std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
+  supported_types_npu_.insert(
+      std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ));
 
   supported_types_cpu_.insert(
       std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
@@ -328,6 +332,7 @@ void DataOps::populate_op_mode_supported() {
   no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}});
   no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
   no_dimension_supported_.push_back({"Expand", V_2023_3, {"CPU"}});
+  no_dimension_supported_.push_back({"Expand", V_2024_3, {"CPU", "GPU"}});
   no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Identity", V_2023_0, {"All"}});
@@ -363,7 +368,7 @@ void DataOps::populate_op_mode_supported() {
 
   // populate unsupportedmode_t
   {
-    UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3},
+    UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
                                for (size_t i = 0; i < node->InputDefs().size(); i++) {
@@ -378,7 +383,7 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"ReduceMax", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3},
+    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
                              [this](const Node* node, const InitializedTensorSet&) {
                                const auto& input_arg = node->InputDefs()[1];
                                auto shape = input_arg->Shape();
@@ -395,7 +400,7 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Reshape", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3},
+    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the operator is unsqueeze
                                // If axes is an input, then we cannot produce a static graph.
@@ -410,7 +415,7 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Unsqueeze", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3},
+    UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // check for attributes
                                auto& upsample_attr = node->GetAttributes();
@@ -583,11 +588,21 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
   }
 }
 
-bool DataOps::unsupported_op_mode(const Node* node) {
+bool DataOps::unsupported_op_mode(const Node* node, bool& has_external_weights_) {
   bool result = false;
   const auto& optype = node->OpType();
   const auto& initializers = graph_viewer_.GetAllInitializedTensors();
 
+  for (const auto& tensor_pair : initializers) {
+    const ONNX_NAMESPACE::TensorProto* tensor_proto = tensor_pair.second;
+    // Check if the tensor exists and if it has an external data location
+    if (tensor_proto && tensor_proto->has_data_location() &&
+        tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL) {
+      has_external_weights_ = true;
+      break;
+    }
+  }
+
   auto iter = op_list_.equal_range(optype);
   for (auto it = iter.first; it != iter.second; ++it) {
     auto ob = it->second;
@@ -637,7 +652,7 @@ bool DataOps::dimension_unsupported(const Node* node) {
   return true;
 }
 
-bool DataOps::node_is_supported(const NodeIndex node_idx) {
+bool DataOps::node_is_supported(const NodeIndex node_idx, bool& has_external_weights_) {
   const auto& node = graph_viewer_.GetNode(node_idx);
   const auto& optype = node->OpType();
 
@@ -745,7 +760,7 @@ bool DataOps::node_is_supported(const NodeIndex node_idx) {
   }
 
   // Check 3a
-  if (domain == kOnnxDomain && unsupported_op_mode(node)) {
+  if (domain == kOnnxDomain && unsupported_op_mode(node, has_external_weights_)) {
     if (optype == "GatherElements") {
       return true;
     }
@@ -760,11 +775,12 @@ bool DataOps::node_is_supported(const NodeIndex node_idx) {
   return true;
 }
 
-std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers) {
+std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers,
+                                                          bool& has_external_weights_) {
   std::vector<NodeIndex> unsupported_nodes_idx;
 
   for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
-    if (node_is_supported(node_idx)) {
+    if (node_is_supported(node_idx, has_external_weights_)) {
       // Collect inputs that are initializers
       graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg,
                                                                                     bool is_input) {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index 4c064b08405c1..5cd4c8658fb77 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -30,7 +30,8 @@ enum versionNum {
   V_2024_0,
   V_2024_1,
   V_2024_2,
-  V_2024_3
+  V_2024_3,
+  V_2024_4
 };
 
 using VersionNum = enum versionNum;
@@ -70,9 +71,9 @@ class DataOps {
   void populate_types_supported();
   bool op_is_supported(std::string name, std::vector<SupportedOp>& list);
   bool dimension_unsupported(const Node* node);
-  bool unsupported_op_mode(const Node* node);
+  bool unsupported_op_mode(const Node* node, bool& has_external_weights_);
   bool type_is_supported(const NodeArg* node_arg, bool is_initializer);
-  bool node_is_supported(const NodeIndex node_idx);
+  bool node_is_supported(const NodeIndex node_idx, bool& has_external_weights_);
 
  public:
   DataOps(const GraphViewer& graph_viewer_param, VersionNum ver,
@@ -85,7 +86,8 @@ class DataOps {
     populate_types_supported();
   }
 
-  virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers);
+  virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(
+      std::unordered_set<std::string>& ng_required_initializers, bool& has_external_weights_);
   virtual bool IsOpSupportedOnlyInModel(std::string name);
   virtual bool SpecialConditionForClusterSizeOne(
       std::unordered_set<std::string>& ng_required_initializers, const Node* node);
diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h
index 34319287a80fd..3efc715fc3037 100644
--- a/onnxruntime/core/providers/shared_library/provider_interfaces.h
+++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h
@@ -578,6 +578,8 @@ struct ProviderHost {
 
   // ConfigOptions
   virtual std::optional<std::string> ConfigOptions__GetConfigEntry(const ConfigOptions* p, const std::string& config_key) = 0;
+  virtual std::string ConfigOptions__GetConfigOrDefault(const ConfigOptions* p, const std::string& config_key,
+                                                        const std::string& default_value) = 0;
 
   // OrtRunOptions
   virtual const ConfigOptions& RunOptions__GetConfigOptions(const RunOptions* p) = 0;
diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
index 4644f703dcb5d..b9e0951a740a2 100644
--- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
+++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
@@ -485,6 +485,10 @@ struct ConfigOptions final {
     return g_host->ConfigOptions__GetConfigEntry(this, config_key);
   }
 
+  std::string GetConfigOrDefault(const std::string& config_key, const std::string& default_value) const {
+    return g_host->ConfigOptions__GetConfigOrDefault(this, config_key, default_value);
+  }
+
   PROVIDER_DISALLOW_ALL(ConfigOptions)
 };
 
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 85079ef78c8d3..2c4bffa4fb79f 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -41,6 +41,7 @@
 
 #include "core/session/onnxruntime_c_api.h"
 #include "core/common/string_helper.h"
+#include <utility>
 
 #ifdef ENABLE_TRAINING
 #ifdef ENABLE_TRAINING_TORCH_INTEROP
@@ -706,6 +707,12 @@ struct ProviderHostImpl : ProviderHost {
     return p->GetConfigEntry(config_key);
   }
 
+  // ConfigOptions (wrapped)
+  std::string ConfigOptions__GetConfigOrDefault(const ConfigOptions* p, const std::string& config_key,
+                                                const std::string& default_value) override {
+    return p->GetConfigOrDefault(config_key, default_value);
+  }
+
   // OrtRunOptions (wrapped)
   const ConfigOptions& RunOptions__GetConfigOptions(const RunOptions* p) override { return p->config_options; }
 
@@ -1783,12 +1790,6 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
   if (legacy_ov_options->device_type != nullptr)
     ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
 
-  if (legacy_ov_options->enable_npu_fast_compile) {
-    ov_options_converted_map["enable_npu_fast_compile"] = "false";
-  } else {
-    ov_options_converted_map["enable_npu_fast_compile"] = "true";
-  }
-
   if (legacy_ov_options->num_of_threads != '\0')
     ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads);
 
@@ -1809,51 +1810,24 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
     ov_options_converted_map["disable_dynamic_shapes"] = "true";
   }
 
+  if (legacy_ov_options->enable_npu_fast_compile) {
+    LOGS_DEFAULT(WARNING) << "enable_npu_fast_compile option is deprecated. Skipping this option";
+  }
   // Add new provider option below
   ov_options_converted_map["num_streams"] = "1";
-  ov_options_converted_map["export_ep_ctx_blob"] = "false";
+  ov_options_converted_map["load_config"] = "";
   ov_options_converted_map["model_priority"] = "DEFAULT";
   ov_options_converted_map["enable_qdq_optimizer"] = "false";
   return ov_options_converted_map;
 }
 
-std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const OrtOpenVINOProviderOptions* provider_options) {
-  ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options);
-  return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map);
-}
-
-void ORTSessionOptionsToOrtOpenVINOProviderOptions(ProviderOptions& ov_options,
-                                                   const SessionOptions* session_options) {
-  bool disable_cpu_fallback = session_options->config_options.GetConfigOrDefault(
-                                  kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
-  if (disable_cpu_fallback)
-    ov_options["disable_cpu_fallback"] = "true";
-
-  // values from session options will override the providerOptions Value
-  bool so_epctx_enable = session_options->config_options.GetConfigOrDefault(
-                             kOrtSessionOptionEpContextEnable, "0") == "1";
-  if (so_epctx_enable)
-    ov_options["so_export_ep_ctx_blob"] = "true";
-
-  std::string so_cache_path = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str();
-  ov_options["so_epctx_path"] = so_cache_path;
-
-  // Default embedMode is 1. Saving the compiled model contents as a Epctx node attribute
-  bool so_epctx_embed_mode = session_options->config_options.GetConfigOrDefault(
-                                 kOrtSessionOptionEpContextEmbedMode, "1") == "0";
-  if (so_epctx_embed_mode) {
-    // defaults to true
-    ov_options["so_epctx_embed_mode"] = "false";
-  }
-}
-
-std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(ProviderOptions* provider_options_map,
-                                                                                  const SessionOptions* session_options) {
+std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(
+    const ProviderOptions* provider_options_map, const SessionOptions* session_options) {
   // Append session options applicable for EP to EP Provider options.
-  if (session_options) {
-    onnxruntime::ORTSessionOptionsToOrtOpenVINOProviderOptions(*provider_options_map, session_options);
-  }
-  return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
+  std::pair<const ProviderOptions*, const ConfigOptions&> config_buffer = {provider_options_map,
+                                                                           session_options->config_options};
+  const void* obj = reinterpret_cast<const void*>(&config_buffer);
+  return s_library_openvino.Get().CreateExecutionProviderFactory(obj);
 }
 
 std::shared_ptr<IExecutionProviderFactory> DnnlProviderFactoryCreator::Create(const OrtDnnlProviderOptions* dnnl_options) {
@@ -2106,9 +2080,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_MIGraphX, _In
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const OrtOpenVINOProviderOptions* provider_options) {
+ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options,
+                    _In_ const OrtOpenVINOProviderOptions* provider_options) {
   API_IMPL_BEGIN
-  auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(provider_options);
+  const onnxruntime::ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options);
+  auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&ov_options_converted_map, &(options->value));
   if (!factory) {
     return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_OpenVINO: Failed to load shared library");
   }
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 3062738eefcf2..63757a6120fa3 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1062,12 +1062,6 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         } else if (option.first == "precision") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "enable_npu_fast_compile") {
-          if (!(option.second == "True" || option.second == "true" ||
-                option.second == "False" || option.second == "false")) {
-            ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second);
-          }
-          OV_provider_options_map[option.first] = option.second;
         } else if (option.first == "enable_opencl_throttling") {
           if (!(option.second == "True" || option.second == "true" ||
                 option.second == "False" || option.second == "false")) {
@@ -1103,13 +1097,13 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         } else if (option.first == "num_streams") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "cache_dir") {
+        } else if (option.first == "load_config") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "context") {
+        } else if (option.first == "cache_dir") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "export_ep_ctx_blob") {
+        } else if (option.first == "context") {
           OV_provider_options_map[option.first] = option.second;
           continue;
         } else if (option.first == "enable_qdq_optimizer") {
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index 42b73ec384cf5..9e1098b24f611 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -76,11 +76,10 @@ namespace perftest {
       "\n"
       "\t    [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
-      "\t    [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n"
       "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
       "\t    [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
-      "\t    [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
+      "\t    [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
       "\n"
       "\t    [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n"
       "\t    [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index eb230ac771e13..a369c36ae9c43 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -18,6 +18,10 @@
 #include "providers.h"
 #include "TestCase.h"
 
+#ifdef USE_OPENVINO
+#include "nlohmann/json.hpp"
+#endif
+
 #ifdef USE_DML
 #include "core/providers/dml/dml_provider_factory.h"
 #include "core/providers/dml/dml_session_options_config_keys.h"
@@ -39,13 +43,8 @@ std::chrono::duration<double> OnnxRuntimeTestSession::Run() {
   auto& input = test_inputs_.at(id);
   auto start = std::chrono::high_resolution_clock::now();
 
-  if (!use_device_mem) {
-    auto output_values = session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(),
-                                      output_names_raw_ptr.data(), output_names_raw_ptr.size());
-  } else {
-    session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(),
-                 output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size());
-  }
+  session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(),
+               output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size());
 
   auto end = std::chrono::high_resolution_clock::now();
   std::chrono::duration<double> duration_seconds = end - start;
@@ -807,13 +806,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
             ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n");
           }
         }
-      } else if (key == "enable_npu_fast_compile") {
-        if (value == "true" || value == "True" ||
-            value == "false" || value == "False") {
-          ov_options[key] = value;
-        } else {
-          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
-        }
       } else if (key == "enable_opencl_throttling") {
         if (value == "true" || value == "True" ||
             value == "false" || value == "False") {
@@ -843,6 +835,28 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         } else {
           ov_options[key] = value;
         }
+      } else if (key == "load_config") {
+        auto load_json = [&](std::string filename) -> std::string {
+          std::ifstream input_filestream(filename);
+          if (!input_filestream.is_open()) {
+            ORT_THROW("Passed an invalid JSON config file path \"" + filename + "\".");
+          }
+          nlohmann::json json_config;
+          try {
+            input_filestream >> json_config;
+          } catch (const OnnxRuntimeException& ex) {
+            ORT_THROW("Exception parsing config file \"" + filename + "\".\n" + ex.what());
+          } catch (const std::exception& ex) {
+            throw std::runtime_error("Standard exception for config file \"" + filename + "\".\n" + ex.what());
+          } catch (...) {
+            throw std::runtime_error("Unknown exception for config file \"" + filename + "\".\n");
+          }
+          if (json_config.empty()) {
+            ORT_THROW("Empty JSON content passed \"" + filename + "\".");
+          }
+          return json_config.dump();
+        };
+        ov_options[key] = load_json(value);
       } else if (key == "model_priority") {
         ov_options[key] = value;
       } else if (key == "cache_dir") {
@@ -855,21 +869,13 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         } else {
           ov_options[key] = value;
         }
-      } else if (key == "export_ep_ctx_blob") {
-        if (value == "true" || value == "True" ||
-            value == "false" || value == "False") {
-          ov_options[key] = value;
-        } else {
-          ORT_THROW(
-              "[ERROR] [OpenVINO] The value for the key 'export_ep_ctx_blob' "
-              "should be a boolean i.e. true or false. Default value is false.\n");
-        }
-      } else if (key == "use_device_mem") {
-        if (value == "true" || value == "True") {
-          use_device_mem = true;
-        }
+      } else if (key == "device_memory_name") {
+        device_memory_name_ = std::move(value);
       } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n");
+        ORT_THROW(
+            "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
+            " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
+            "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer', 'model_priority'] \n");
       }
     }
     session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);
@@ -912,25 +918,31 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     input_names_[i] = input_names_str_[i].c_str();
   }
 
-  if (use_device_mem) {
-    Ort::MemoryInfo memory_info = Ort::MemoryInfo("OpenVINO_RT_NPU", OrtArenaAllocator, 0, OrtMemTypeCPUOutput);
+  auto transform_fcn = std::function<int64_t(int64_t)>();
+  auto new_value = std::function<Ort::Value(OrtAllocator*, const std::vector<int64_t>&, Ort::ConstTensorTypeAndShapeInfo&)>();
+  if (device_memory_name_.empty()) {
+    transform_fcn = [](int64_t input) { return input; };
+    new_value = [](OrtAllocator*, const std::vector<int64_t>&, Ort::ConstTensorTypeAndShapeInfo&) {
+      return Ort::Value(nullptr);
+    };
+  } else {
+    Ort::MemoryInfo memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeCPUOutput);
     custom_allocator_ = std::make_unique<Ort::Allocator>(session_, memory_info);
-    for (size_t i = 0; i < output_names_raw_ptr.size(); i++) {
-      Ort::TypeInfo type_info = session_.GetOutputTypeInfo(i);
-      auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-
-      std::vector<int64_t> output_shape = tensor_info.GetShape();
+    allocator_ = *custom_allocator_;
 
-      // free dimensions are treated as 1 if not overridden
-      for (int64_t& dim : output_shape) {
-        if (dim == -1) {
-          dim = 1;
-        }
-      }
+    // free dimensions are treated as 1 if not overridden
+    transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; };
+    new_value = [](OrtAllocator* allocator, const std::vector<int64_t>& output_shape, Ort::ConstTensorTypeAndShapeInfo& tensor_info) {
+      return Ort::Value::CreateTensor(allocator, output_shape.data(), output_shape.size(), tensor_info.GetElementType());
+    };
+  }
 
-      outputs_.push_back(Ort::Value::CreateTensor(*custom_allocator_, (const int64_t*)output_shape.data(),
-                                                  output_shape.size(), tensor_info.GetElementType()));
-    }
+  for (size_t i = 0; i < output_names_raw_ptr.size(); i++) {
+    Ort::TypeInfo type_info = session_.GetOutputTypeInfo(i);
+    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+    std::vector<int64_t> output_shape = tensor_info.GetShape();
+    std::transform(output_shape.begin(), output_shape.end(), output_shape.begin(), transform_fcn);
+    outputs_.emplace_back(new_value(allocator_, output_shape, tensor_info));
   }
 }
 
@@ -1020,29 +1032,16 @@ bool OnnxRuntimeTestSession::PopulateGeneratedInputTestData(int32_t seed) {
     Ort::TypeInfo type_info = session_.GetInputTypeInfo(i);
     if (type_info.GetONNXType() == ONNX_TYPE_TENSOR) {
       auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-      if (!use_device_mem) {
-        Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
-      }
       std::vector<int64_t> input_node_dim = tensor_info.GetShape();
 
       // free dimensions are treated as 1 if not overridden
-      for (int64_t& dim : input_node_dim) {
-        if (dim == -1) {
-          dim = 1;
-        }
-      }
-      if (use_device_mem) {
-        Ort::Value input_tensor = Ort::Value::CreateTensor(*custom_allocator_, (const int64_t*)input_node_dim.data(),
-                                                           input_node_dim.size(), tensor_info.GetElementType());
-        InitializeTensorWithSeed(seed, input_tensor);
-        PreLoadTestData(0, i, std::move(input_tensor));
-      } else {
-        auto allocator = Ort::AllocatorWithDefaultOptions();
-        Ort::Value input_tensor = Ort::Value::CreateTensor(allocator, (const int64_t*)input_node_dim.data(),
-                                                           input_node_dim.size(), tensor_info.GetElementType());
-        InitializeTensorWithSeed(seed, input_tensor);
-        PreLoadTestData(0, i, std::move(input_tensor));
-      }
+      auto transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; };
+      std::transform(input_node_dim.begin(), input_node_dim.end(), input_node_dim.begin(), transform_fcn);
+
+      Ort::Value input_tensor = Ort::Value::CreateTensor(allocator_, (const int64_t*)input_node_dim.data(),
+                                                         input_node_dim.size(), tensor_info.GetElementType());
+      InitializeTensorWithSeed(seed, input_tensor);
+      PreLoadTestData(0, i, std::move(input_tensor));
     }
   }
   return true;
diff --git a/onnxruntime/test/perftest/ort_test_session.h b/onnxruntime/test/perftest/ort_test_session.h
index e33041a2a0958..7d5e46983ad41 100644
--- a/onnxruntime/test/perftest/ort_test_session.h
+++ b/onnxruntime/test/perftest/ort_test_session.h
@@ -38,6 +38,7 @@ class OnnxRuntimeTestSession : public TestSession {
   std::mt19937 rand_engine_;
   std::uniform_int_distribution<int> dist_;
   std::vector<std::vector<Ort::Value>> test_inputs_;
+  OrtAllocator* allocator_ = Ort::AllocatorWithDefaultOptions();
   std::unique_ptr<Ort::Allocator> custom_allocator_;
   std::vector<Ort::Value> outputs_;
   std::vector<std::string> output_names_;
@@ -48,7 +49,7 @@ class OnnxRuntimeTestSession : public TestSession {
   std::vector<std::string> input_names_str_;
   const int input_length_;
   std::string provider_name_;
-  bool use_device_mem = false;
+  std::string device_memory_name_;  // Device memory type name to use from the list in allocator.h
 };
 
 }  // namespace perftest
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 177647ab5be6b..e3c86a137484f 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -570,7 +570,7 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
       ORT_TSTR("yolov3"),
       ORT_TSTR("LSTM_Seq_lens_unpacked"),
       ORT_TSTR("tinyyolov3"),
-      ORT_TSTR("faster_rcnn"),
+      // ORT_TSTR("faster_rcnn"),
       ORT_TSTR("mask_rcnn"),
       ORT_TSTR("coreml_FNS-Candy_ImageNet"),
       ORT_TSTR("tf_mobilenet_v2_1.0_224"),
@@ -581,7 +581,7 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
       ORT_TSTR("mlperf_ssd_resnet34_1200"),
       ORT_TSTR("candy"),
       ORT_TSTR("cntk_simple_seg"),
-      ORT_TSTR("GPT2_LM_HEAD"),
+      // ORT_TSTR("GPT2_LM_HEAD"),
       ORT_TSTR("mlperf_ssd_mobilenet_300"),
       ORT_TSTR("fp16_coreml_FNS-Candy"),
       ORT_TSTR("fp16_test_tiny_yolov2"),
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index d07e01c1a4e27..d57a22f024d5f 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -99,11 +99,13 @@ std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const O
   return nullptr;
 }
 
-std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params) {
+std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const ProviderOptions* params,
+                                                                         const SessionOptions* session_options) {
 #ifdef USE_OPENVINO
-  return OpenVINOProviderFactoryCreator::Create(params)->CreateProvider();
+  return OpenVINOProviderFactoryCreator::Create(params, session_options)->CreateProvider();
 #else
   ORT_UNUSED_PARAMETER(params);
+  ORT_UNUSED_PARAMETER(session_options);
   return nullptr;
 #endif
 }
diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h
index 1fd9894e09d4e..ed95bf67f1ffb 100644
--- a/onnxruntime/test/util/include/default_providers.h
+++ b/onnxruntime/test/util/include/default_providers.h
@@ -49,7 +49,7 @@ std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const O
 std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params);
 std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider();
 std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params);
-std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params);
+std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const ProviderOptions* params, const SessionOptions* session_options = nullptr);
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultVSINPUExecutionProvider();