Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ovep develop lnl 1.2 #22424

Merged
merged 38 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a554e5e
fix: caching lookup to behave correctly when inputs/output mapping ar…
ericcraw Sep 23, 2024
f70b885
fix tensor caching
saurabhkale17 Sep 24, 2024
ce64f7a
fix lint issues
saurabhkale17 Sep 24, 2024
086a048
fix lint issues
saurabhkale17 Sep 25, 2024
d85d7bb
Improvement in average inference latency for models running on OVEP N…
saurabhkale17 Sep 5, 2024
d455fe0
Ovep release lnl 1.2.1 (#445)
saurabhkale17 Sep 11, 2024
87fd31a
Refactor device memory implementation to make it more generic
javier-intel Sep 13, 2024
48f0291
fix lint issues
saurabhkale17 Sep 20, 2024
03906a3
Modified Create Options to pass config options to execution Provider
sfatimar Sep 23, 2024
05802f4
Changes for adding config buffer
sfatimar Sep 23, 2024
feaa0fb
fix psa psr accuracy issue
saurabhkale17 Sep 23, 2024
bbebca8
feat: Load custom json OV config during runtime
ankitm3k Sep 14, 2024
ab9de33
fix: review comment fixes
ankitm3k Sep 25, 2024
89b5006
fix: fix lint issues
ankitm3k Sep 25, 2024
f9b995c
Revert "fix psa psr accuracy issue"
saurabhkale17 Sep 25, 2024
b8bbc1c
Merge pull request #455 from intel/saurabh/fix_regression
sfatimar Sep 25, 2024
717a9c3
update: Enable Python API for load_config in provider options
ankitm3k Sep 26, 2024
bc8b5cd
Merge branch 'microsoft:main' into ovep-develop-lnl-1.2
preetha-intel Sep 26, 2024
39f5d4c
Merge pull request #456 from intel/ankit/ov_config_loader_v2
sfatimar Sep 26, 2024
9999cd3
update: handling few edge cases for parsing
ankitm3k Sep 30, 2024
91cb834
Merge pull request #459 from intel/ankit/ov_config_loader_v2
ankitm3k Sep 30, 2024
e95451b
added input checks for enable_qdq_optimiser (#460)
n1harika Oct 1, 2024
b766127
Merge branch 'microsoft:main' into ovep-develop-lnl-1.2
sfatimar Oct 1, 2024
4db9fb0
FP8 support on NPU (#462)
n1harika Oct 1, 2024
710d309
fix: updated Expand Op properties (#463)
ankitm3k Oct 1, 2024
f3fddd4
removing fasterRCNN and GPT2_LM_HEAD from openvino_disabled_tests[] (…
n1harika Oct 1, 2024
0dd8768
Refactor tensor initialization check for external weights and fixed l…
jatinwadhwa921 Oct 1, 2024
90aa047
Remove npu fast compile (#465)
preetha-intel Oct 2, 2024
fc3b92e
fix: Securing load_config path parsing
ankitm3k Oct 2, 2024
dd49e37
Device memory refactor fix (#466)
javier-intel Oct 3, 2024
783b147
Merge pull request #467 from intel/ankit/ov_config_loader_v4
sfatimar Oct 4, 2024
d7ac0ac
Fixed coverity issues (#471)
jatinwadhwa921 Oct 11, 2024
a22166f
update: Using load_config as map (#470)
ankitm3k Oct 13, 2024
6089543
Upgrade OpenVINO Toolkit v2024.4 (#472)
jatinwadhwa921 Oct 13, 2024
3d6604d
Merge branch 'microsoft:main' into ovep-develop-lnl-1.2
vthaniel Oct 13, 2024
463328e
Commit Unused parameter for session_options
sfatimar Oct 14, 2024
4e308e7
Merge branch 'ovep-develop-lnl-1.2' of https://github.com/intel/onnxr…
sfatimar Oct 14, 2024
a8350bb
Retain the depreacted provider option with a warning
preetha-intel Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1352,6 +1352,7 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DUSE_OPENVINO=1)

if(onnxruntime_NPU_NO_FALLBACK)
add_definitions(-DOPENVINO_CONFIG_NPU=1)
add_definitions(-DOPENVINO_DISABLE_NPU_FALLBACK=1)
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
adrianlizarraga marked this conversation as resolved.
Show resolved Hide resolved
unsigned char enable_npu_fast_compile;
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
Expand Down
82 changes: 76 additions & 6 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@
subgraph_context_.subgraph_name);
ie_cnn_network_ = exe_network_.Get().get_runtime_model();
} else if (global_context_.export_ep_ctx_blob &&
hw_target.find("NPU") != std::string::npos) {
hw_target.find("NPU") != std::string::npos &&
!global_context_.has_external_weights) {
std::shared_ptr<ov::Model> ov_model;
{
const std::string model = model_proto->SerializeAsString();
Expand All @@ -93,7 +94,8 @@
ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor());
}
exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config));
} else if ((!subgraph_context_.has_dynamic_input_shape) &&
} else if (!global_context_.has_external_weights &&
(!subgraph_context_.has_dynamic_input_shape) &&
((hw_target.find("AUTO") == std::string::npos) ||
(global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) {
// Optimized OV compile_model API is supported with AUTO from version 2024.3 and above
Expand Down Expand Up @@ -178,6 +180,74 @@
}
#endif
}

if (!global_context_.load_config.empty()) {
const std::map<std::string, ov::AnyMap>& target_config = global_context_.load_config;

// Parse device types like "AUTO:CPU,GPU" and extract individual devices
auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
std::vector<std::string> devices;
auto delimiter_pos = device_type.find(':');
if (delimiter_pos != std::string::npos) {
std::stringstream str_stream(device_type.substr(delimiter_pos + 1));
std::string device;
while (std::getline(str_stream, device, ',')) {
devices.emplace_back(device);
}
} else {
devices.emplace_back(device_type);
}
return devices;
};

// Check if a property is supported and mutable
auto is_supported_and_mutable = [&](const std::string& key,
const std::vector<ov::PropertyName>& supported_config) -> bool {
auto it = std::find_if(supported_config.begin(), supported_config.end(), [&](const ov::PropertyName& property) {
return property == key && property.is_mutable();
});
return it != supported_config.end();
};

// Set properties if they are valid, else log a warning if the property is missing or immutable by skipping the same
auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options,
const std::vector<ov::PropertyName>& supported_properties) {

Check warning on line 214 in onnxruntime/core/providers/openvino/backends/basic_backend.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/backends/basic_backend.cc:214: Add #include <vector> for vector<> [build/include_what_you_use] [4]
for (const auto& [key, value] : config_options) {
if (is_supported_and_mutable(key, supported_properties)) {
global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
} else {
LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key
<< "\" is either unsupported in current OpenVINO version"
<< " or property is immutable for target device \""
<< device << "\". Skipping setting this property.";
}
}
};

// Check if the device type is AUTO, HETERO, or MULTI
if (global_context_.device_type.find("AUTO") == 0 ||
global_context_.device_type.find("HETERO") == 0 ||
global_context_.device_type.find("MULTI") == 0) {
// Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
auto individual_devices = parse_individual_devices(global_context_.device_type);
// Set properties only for individual devices (e.g., "CPU", "GPU")
for (const std::string& device : individual_devices) {
if (target_config.count(device)) {
// Get supported properties for each individual device
auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties);
// Set properties for the device
set_target_properties(device, target_config.at(device), device_properties);
}
}
} else {
if (target_config.count(global_context_.device_type)) {
auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type,
ov::supported_properties);
set_target_properties(global_context_.device_type,
target_config.at(global_context_.device_type), supported_properties);
}
}
}
}

void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
Expand Down Expand Up @@ -275,7 +345,7 @@
input_tensor_shape[tensor_iter] = *i;
tensor_iter += 1;
}
auto input = graph_input_info.at(input_idx);
const auto& input = graph_input_info.at(input_idx);
OVTensorPtr tensor_ptr;
// avoid input copies on the CPU device
if (global_context_.device_type.find("CPU") != std::string::npos) {
Expand Down Expand Up @@ -316,7 +386,7 @@
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

try {
infer_request->SetTensor(input_name, ov_tensor_data.tensor_ptr);
infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr);
} catch (const char* msg) {
ORT_THROW(msg);
}
Expand Down Expand Up @@ -354,14 +424,14 @@
if ((it == ort_ov_tensor_map.end()) ||
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
ov_tensor_data_t ov_tensor_data;
auto output = graph_output_info.at(output_idx);
const auto& output = graph_output_info.at(output_idx);
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

try {
infer_request->SetTensor(output_name, ov_tensor_data.tensor_ptr);
infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr);
} catch (const char* msg) {
ORT_THROW(msg);
}
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <vector>
#include <map>
#include <unordered_map>
#include <string>
#include "core/providers/openvino/ov_interface.h"
Expand All @@ -15,18 +16,19 @@ namespace openvino_ep {
struct GlobalContext {
OVCore ie_core;
bool is_wholly_supported_graph = false;
bool enable_npu_fast_compile = false;
bool enable_opencl_throttling = false;
bool disable_dynamic_shapes = false;
bool ep_context_embed_mode = true;
bool export_ep_ctx_blob = false;
bool enable_qdq_optimizer = false;
bool disable_cpu_fallback = false;
bool has_external_weights = false;
size_t num_of_threads;
std::string device_type;
std::string precision_str;
std::string model_precision;
std::string cache_dir;
std::map<std::string, ov::AnyMap> load_config;
std::string model_priority = "DEFAULT";
int num_streams;
std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
global_context_ = std::make_unique<openvino_ep::GlobalContext>();
global_context_->device_type = info.device_type_;
global_context_->precision_str = info.precision_;
global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
global_context_->cache_dir = info.cache_dir_;
global_context_->load_config = info.load_config_;
global_context_->model_priority = info.model_priority_;
global_context_->num_streams = info.num_streams_;
global_context_->context = info.context_;
Expand Down Expand Up @@ -124,6 +124,7 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
result = obj.Execute();

global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph();
global_context_->has_external_weights = obj.HasExternalWeights();

return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ static std::vector<std::string> parseDevices(const std::string& device_string,
struct OpenVINOExecutionProviderInfo {
std::string device_type_{""};
std::string precision_{""};
bool enable_npu_fast_compile_{false};
size_t num_of_threads_{0};
std::map<std::string, ov::AnyMap> load_config_{};
std::string cache_dir_{""};
std::string model_priority_{""};
int num_streams_{1};
Expand All @@ -94,16 +94,18 @@ struct OpenVINOExecutionProviderInfo {

OpenVINOExecutionProviderInfo() = delete;

explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision,
bool enable_npu_fast_compile, size_t num_of_threads,
const std::string& cache_dir, const std::string& model_priority,
int num_streams, void* context, bool enable_opencl_throttling,
explicit OpenVINOExecutionProviderInfo(std::string dev_type, const std::string& precision,
size_t num_of_threads,
const std::map<std::string, ov::AnyMap>& load_config,
const std::string& cache_dir,
const std::string& model_priority, int num_streams,
void* context, bool enable_opencl_throttling,
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
bool enable_qdq_optimizer, bool disable_cpu_fallback,
bool so_epctx_embed_mode)
: precision_(std::move(precision)),
enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
load_config_(std::move(load_config)),
cache_dir_(std::move(cache_dir)),
model_priority_(std::move(model_priority)),
num_streams_(num_streams),
Expand Down
Loading
Loading