From 75322e94e4fef05097d22a9ebed052f11850d970 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 10 Sep 2024 18:01:34 +1000 Subject: [PATCH 01/28] Use Dawn libs directly to minimize binary size. Fix Release build errors. Fix Android build errors. --- .../external/onnxruntime_external_deps.cmake | 41 +++++++++++++++-- cmake/onnxruntime_providers_webgpu.cmake | 12 ++--- cmake/patches/dawn/dawn.patch | 12 +++++ .../core/providers/webgpu/buffer_manager.cc | 8 ++-- .../providers/webgpu/program_cache_key.cc | 5 +- .../core/providers/webgpu/program_manager.h | 2 +- .../core/providers/webgpu/shader_helper.cc | 46 +++++++++---------- .../core/providers/webgpu/webgpu_context.cc | 5 ++ 8 files changed, 89 insertions(+), 42 deletions(-) create mode 100644 cmake/patches/dawn/dawn.patch diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index a8ab4a53b9f3a..c2d5957a9910c 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -590,12 +590,45 @@ if (onnxruntime_USE_WEBGPU) dawn URL ${DEP_URL_dawn} URL_HASH SHA1=${DEP_SHA1_dawn} + PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch ) - set(DAWN_FETCH_DEPENDENCIES ON) - set(DAWN_ENABLE_INSTALL ON) - set(TINT_BUILD_TESTS OFF) - set(DAWN_USE_BUILT_DXC ON) + + # use dawn::native_objects and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size + set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE) + set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE) + set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE) + + # disable things we don't use set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF) + set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE) + set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE) + set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE) + set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE) + set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE) + + set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving + set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key + + # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V. + if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE) + endif() + + if (WIN32) + set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE) + + # Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it. + set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) + endif() + onnxruntime_fetchcontent_makeavailable(dawn) endif() diff --git a/cmake/onnxruntime_providers_webgpu.cmake b/cmake/onnxruntime_providers_webgpu.cmake index 587c4b2c1ff2c..8d00ab5aa4494 100644 --- a/cmake/onnxruntime_providers_webgpu.cmake +++ b/cmake/onnxruntime_providers_webgpu.cmake @@ -24,14 +24,8 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs}) onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs}) - onnxruntime_add_include_to_target(onnxruntime_providers_webgpu onnxruntime_common onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface) - target_link_libraries(onnxruntime_providers_webgpu dawn::webgpu_dawn) - - # Copy webgpu_dawn.dll to the output directory - add_custom_command( - TARGET onnxruntime_providers_webgpu - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$" - VERBATIM ) + onnxruntime_add_include_to_target(onnxruntime_providers_webgpu + onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface) + target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc) set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime") diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch new file mode 100644 index 0000000000000..33eb430329603 --- /dev/null +++ b/cmake/patches/dawn/dawn.patch @@ -0,0 +1,12 @@ +diff --git a/src/tint/api/BUILD.cmake b/src/tint/api/BUILD.cmake +index 0037d83276..6372c4ee77 100644 +--- a/src/tint/api/BUILD.cmake ++++ b/src/tint/api/BUILD.cmake +@@ -57,6 +57,7 @@ tint_target_add_dependencies(tint_api lib + tint_lang_wgsl_ast_transform + tint_lang_wgsl_common + tint_lang_wgsl_features ++ tint_lang_wgsl_inspector + tint_lang_wgsl_program + tint_lang_wgsl_sem + tint_lang_wgsl_writer_ir_to_program diff --git a/onnxruntime/core/providers/webgpu/buffer_manager.cc b/onnxruntime/core/providers/webgpu/buffer_manager.cc index da544e1d1ed60..8751338d24178 100644 --- a/onnxruntime/core/providers/webgpu/buffer_manager.cc +++ b/onnxruntime/core/providers/webgpu/buffer_manager.cc @@ -243,10 +243,10 @@ std::ostream& operator<<(std::ostream& os, BufferCacheMode mode) { BufferManager::BufferManager(WebGpuContext& context, BufferCacheMode storage_buffer_cache_mode, BufferCacheMode uniform_buffer_cache_mode, BufferCacheMode query_resolve_buffer_cache_mode) : context_{context}, - storage_cache_{std::move(CreateBufferCacheManager(storage_buffer_cache_mode))}, - uniform_cache_{std::move(CreateBufferCacheManager(uniform_buffer_cache_mode))}, - query_resolve_cache_{std::move(CreateBufferCacheManager(query_resolve_buffer_cache_mode))}, - default_cache_{std::move(CreateBufferCacheManager(BufferCacheMode::Disabled))} { + storage_cache_{CreateBufferCacheManager(storage_buffer_cache_mode)}, + uniform_cache_{CreateBufferCacheManager(uniform_buffer_cache_mode)}, + query_resolve_cache_{CreateBufferCacheManager(query_resolve_buffer_cache_mode)}, + default_cache_{CreateBufferCacheManager(BufferCacheMode::Disabled)} { } void BufferManager::Upload(void* src, WGPUBuffer dst, size_t size) { diff --git a/onnxruntime/core/providers/webgpu/program_cache_key.cc b/onnxruntime/core/providers/webgpu/program_cache_key.cc index 09a536f7916b2..6c7ef2bc89c6b 100644 --- a/onnxruntime/core/providers/webgpu/program_cache_key.cc +++ b/onnxruntime/core/providers/webgpu/program_cache_key.cc @@ -10,12 +10,14 @@ namespace webgpu { namespace { // append the info of an input or output to the cachekey -void AppendTensorInfo(std::ostringstream& ss, const Tensor& tensor, ProgramVariableDataType var_type, ProgramTensorMetadataDependency dependency, bool& first) { +void AppendTensorInfo(std::ostringstream& ss, const Tensor& tensor, ProgramVariableDataType var_type, ProgramTensorMetadataDependency dependency, + bool& first) { if (first) { first = false; } else { ss << '|'; } + if ((dependency & ProgramTensorMetadataDependency::Type) == ProgramTensorMetadataDependency::Type) { #ifndef NDEBUG // if debug build ss << var_type; @@ -24,6 +26,7 @@ void AppendTensorInfo(std::ostringstream& ss, const Tensor& tensor, ProgramVaria #endif ss << ';'; } + if ((dependency & ProgramTensorMetadataDependency::Shape) == ProgramTensorMetadataDependency::Shape) { ss D("Dims=") << tensor.Shape().ToString(); } else if ((dependency & ProgramTensorMetadataDependency::Rank) == ProgramTensorMetadataDependency::Rank) { diff --git a/onnxruntime/core/providers/webgpu/program_manager.h b/onnxruntime/core/providers/webgpu/program_manager.h index 782788910e3a5..83e5ff21c813c 100644 --- a/onnxruntime/core/providers/webgpu/program_manager.h +++ b/onnxruntime/core/providers/webgpu/program_manager.h @@ -30,7 +30,7 @@ class ProgramArtifact { const std::vector shape_uniform_ranks; ProgramArtifact(ProgramArtifact&&) = default; - ProgramArtifact& operator=(ProgramArtifact&&) = default; + ProgramArtifact& operator=(ProgramArtifact&&) = delete; // can't change const members. private: ORT_DISALLOW_COPY_AND_ASSIGNMENT(ProgramArtifact); diff --git a/onnxruntime/core/providers/webgpu/shader_helper.cc b/onnxruntime/core/providers/webgpu/shader_helper.cc index cd21f4752f300..be89efae5fc97 100644 --- a/onnxruntime/core/providers/webgpu/shader_helper.cc +++ b/onnxruntime/core/providers/webgpu/shader_helper.cc @@ -196,6 +196,29 @@ Status ValidateVariableDependency(ProgramTensorMetadataDependency dependency, Sh } } // namespace +Status ShaderHelper::ValidateVariable(const ProgramInput& input, const ShaderVariable& var) const { + ORT_RETURN_IF_ERROR(ValidateVariableDataType(input.tensor->GetElementType(), var.type_)); + ORT_RETURN_IF_ERROR(ValidateVariableShape(input.tensor->Shape(), + input.use_override_shape, + input.use_override_shape ? input.override_shape : input.tensor->Shape(), + var.num_components_)); + ORT_RETURN_IF_ERROR(ValidateVariableDependency(input.dependency, var.usage_, true)); + + return Status::OK(); +} +Status ShaderHelper::ValidateVariable(const ProgramOutput& output, const ShaderVariable& var) const { + ORT_RETURN_IF_ERROR(ValidateVariableDataType(output.tensor->GetElementType(), var.type_)); + ORT_RETURN_IF_ERROR(ValidateVariableShape(output.tensor->Shape(), + output.use_override_shape, + output.use_override_shape ? output.override_shape : output.tensor->Shape(), + var.num_components_)); + ORT_RETURN_IF_ERROR(ValidateVariableDependency(output.dependency, var.usage_, false)); + + return Status::OK(); +} + +#endif // NDEBUG + const ShaderVariable& ShaderHelper::AddVariableImpl(ProgramVariableScope scope, const std::string& name, ShaderVariable::Usage usage, @@ -224,27 +247,6 @@ const ShaderVariable& ShaderHelper::AddVariableImpl(ProgramVariableScope scope, return *var; } -Status ShaderHelper::ValidateVariable(const ProgramInput& input, const ShaderVariable& var) const { - ORT_RETURN_IF_ERROR(ValidateVariableDataType(input.tensor->GetElementType(), var.type_)); - ORT_RETURN_IF_ERROR(ValidateVariableShape(input.tensor->Shape(), - input.use_override_shape, - input.use_override_shape ? input.override_shape : input.tensor->Shape(), - var.num_components_)); - ORT_RETURN_IF_ERROR(ValidateVariableDependency(input.dependency, var.usage_, true)); - - return Status::OK(); -} -Status ShaderHelper::ValidateVariable(const ProgramOutput& output, const ShaderVariable& var) const { - ORT_RETURN_IF_ERROR(ValidateVariableDataType(output.tensor->GetElementType(), var.type_)); - ORT_RETURN_IF_ERROR(ValidateVariableShape(output.tensor->Shape(), - output.use_override_shape, - output.use_override_shape ? output.override_shape : output.tensor->Shape(), - var.num_components_)); - ORT_RETURN_IF_ERROR(ValidateVariableDependency(output.dependency, var.usage_, false)); - - return Status::OK(); -} - Status ShaderHelper::ValidateShapeForInputsAndOutputs() const { const auto& input_vars = vars_[static_cast(ProgramVariableScope::Input)]; const auto& output_vars = vars_[static_cast(ProgramVariableScope::Output)]; @@ -304,8 +306,6 @@ Status ShaderHelper::ValidateShapeForInputsAndOutputs() const { return Status::OK(); } -#endif - Status ShaderHelper::GenerateSourceCode(std::string& code, std::vector& shape_uniform_ranks) const { std::ostringstream ss; ss.imbue(std::locale::classic()); diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index 276d74905adb7..0d994faeda472 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -4,6 +4,9 @@ #include #include +#include "dawn/dawn_proc.h" +#include "dawn/native/DawnNative.h" + #include "core/common/common.h" #include "core/providers/webgpu/compute_context.h" @@ -89,6 +92,8 @@ void WebGpuContext::Initialize(const WebGpuExecutionProviderInfo& webgpu_ep_info std::call_once(init_flag_, [this, &webgpu_ep_info]() { // Initialization.Step.1 - Create wgpu::Instance if (instance_ == nullptr) { + dawnProcSetProcs(&dawn::native::GetProcs()); + wgpu::InstanceDescriptor instance_desc{}; instance_desc.features.timedWaitAnyEnable = true; instance_ = wgpu::CreateInstance(&instance_desc); From e8ed35f3d64fc0909f91f259b781b6a5f856323b Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 10 Sep 2024 18:21:23 +1000 Subject: [PATCH 02/28] Fix Windows build --- cmake/external/onnxruntime_external_deps.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index c2d5957a9910c..ae16e4e0b9971 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -623,7 +623,9 @@ if (onnxruntime_USE_WEBGPU) endif() if (WIN32) + # building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON. set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE) + set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE) # Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it. set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) From f4cbc7654d01c9e13e3c21568a39682d061a73fc Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 11 Sep 2024 13:55:51 +1000 Subject: [PATCH 03/28] Update patch with iOS build fixes. --- cmake/patches/dawn/dawn.patch | 54 +++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch index 33eb430329603..d696d386452e8 100644 --- a/cmake/patches/dawn/dawn.patch +++ b/cmake/patches/dawn/dawn.patch @@ -1,3 +1,57 @@ +diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt +index 9c0bd6fa4e..bf8a57aeac 100644 +--- a/src/dawn/native/CMakeLists.txt ++++ b/src/dawn/native/CMakeLists.txt +@@ -857,6 +857,11 @@ if (DAWN_ENABLE_SWIFTSHADER) + target_compile_definitions(dawn_native PRIVATE "DAWN_ENABLE_SWIFTSHADER") + endif() + ++if (IOS) ++ target_compile_options(dawn_native_objects PRIVATE -fno-objc-arc) ++ target_compile_options(dawn_native PRIVATE -fno-objc-arc) ++endif() ++ + if (DAWN_BUILD_MONOLITHIC_LIBRARY) + ############################################################################### + # Do the 'complete_lib' build. +diff --git a/src/dawn/native/Surface_metal.mm b/src/dawn/native/Surface_metal.mm +index ce55acbd43..baa4835362 100644 +--- a/src/dawn/native/Surface_metal.mm ++++ b/src/dawn/native/Surface_metal.mm +@@ -36,7 +36,13 @@ + namespace dawn::native { + + bool InheritsFromCAMetalLayer(void* obj) { +- id object = static_cast(obj); ++ id object = ++#if TARGET_OS_IOS ++ (__bridge id)obj; ++#else ++ static_cast(obj); ++#endif ++ + return [object isKindOfClass:[CAMetalLayer class]]; + } + +diff --git a/src/dawn/native/metal/SharedFenceMTL.mm b/src/dawn/native/metal/SharedFenceMTL.mm +index bde8bfea07..f2f6459e91 100644 +--- a/src/dawn/native/metal/SharedFenceMTL.mm ++++ b/src/dawn/native/metal/SharedFenceMTL.mm +@@ -40,7 +40,13 @@ ResultOrError> SharedFence::Create( + DAWN_INVALID_IF(descriptor->sharedEvent == nullptr, "MTLSharedEvent is missing."); + if (@available(macOS 10.14, iOS 12.0, *)) { + return AcquireRef(new SharedFence( +- device, label, static_cast>(descriptor->sharedEvent))); ++ device, label, ++#if TARGET_OS_IOS ++ (__bridge id)(descriptor->sharedEvent) ++#else ++ static_cast>(descriptor->sharedEvent) ++#endif ++ )); + } else { + return DAWN_INTERNAL_ERROR("MTLSharedEvent not supported."); + } diff --git a/src/tint/api/BUILD.cmake b/src/tint/api/BUILD.cmake index 0037d83276..6372c4ee77 100644 --- a/src/tint/api/BUILD.cmake From e1e75b8659317cbe29685454ccd2476c520469bf Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 11 Sep 2024 17:30:33 +1000 Subject: [PATCH 04/28] WGSL writer is only needed when Vulkan is being used --- cmake/external/onnxruntime_external_deps.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index cd4baa2b67c08..657f9cf5d51eb 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -663,7 +663,7 @@ if (onnxruntime_USE_WEBGPU) set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE) set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving - set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key + set(TINT_BUILD_WGSL_WRITER OFF CACHE BOOL "" FORCE) # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V. if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -679,6 +679,10 @@ if (onnxruntime_USE_WEBGPU) set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) endif() + if (ANDROID) + set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key for Vulkan shader + endif() + onnxruntime_fetchcontent_makeavailable(dawn) endif() From afd202a9e5a33ef93c7567bc911acad0cd73a28e Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Fri, 13 Sep 2024 16:53:32 +1000 Subject: [PATCH 05/28] Fix build errors --- onnxruntime/core/providers/webgpu/webgpu_context.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.h b/onnxruntime/core/providers/webgpu/webgpu_context.h index 3251364e85ce3..f74dda38fca04 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.h +++ b/onnxruntime/core/providers/webgpu/webgpu_context.h @@ -110,11 +110,11 @@ class WebGpuContext final { : instance_{instance}, adapter_{adapter}, device_{device}, validation_mode_{validation_mode} {} ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(WebGpuContext); - std::vector WebGpuContext::GetEnabledAdapterToggles() const; - std::vector WebGpuContext::GetEnabledDeviceToggles() const; - std::vector WebGpuContext::GetDisabledDeviceToggles() const; - std::vector WebGpuContext::GetAvailableRequiredFeatures(const wgpu::Adapter& adapter) const; - wgpu::RequiredLimits WebGpuContext::GetRequiredLimits(const wgpu::Adapter& adapter) const; + std::vector GetEnabledAdapterToggles() const; + std::vector GetEnabledDeviceToggles() const; + std::vector GetDisabledDeviceToggles() const; + std::vector GetAvailableRequiredFeatures(const wgpu::Adapter& adapter) const; + wgpu::RequiredLimits GetRequiredLimits(const wgpu::Adapter& adapter) const; std::once_flag init_flag_; From bd25d1c1321a76160477cc7906e85e9ba4fdd894 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Fri, 13 Sep 2024 17:15:56 +1000 Subject: [PATCH 06/28] Fix transpose.cc build error. --- onnxruntime/core/providers/webgpu/tensor/transpose.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/webgpu/tensor/transpose.cc b/onnxruntime/core/providers/webgpu/tensor/transpose.cc index 68af858d515c2..86a9478a15b57 100644 --- a/onnxruntime/core/providers/webgpu/tensor/transpose.cc +++ b/onnxruntime/core/providers/webgpu/tensor/transpose.cc @@ -52,7 +52,7 @@ const std::string AppendPermFunction(gsl::span perm) { ss.imbue(std::locale::classic()); ss << "fn perm(i: y_indices_t)->x_indices_t {\n" " var a: x_indices_t;\n"; - for (auto i = 0; i < perm.size(); ++i) { + for (size_t i = 0; i < perm.size(); ++i) { ss << " a[" << perm[i] << "] = i[" << i << "];\n"; } ss << " return a;\n" From 3f9be822917f357d500413deb435dbd5d534592f Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Fri, 13 Sep 2024 17:35:58 +1000 Subject: [PATCH 07/28] Go back to WGSL writer being required on all builds --- cmake/external/onnxruntime_external_deps.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 657f9cf5d51eb..d515e117e0718 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -663,7 +663,7 @@ if (onnxruntime_USE_WEBGPU) set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE) set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving - set(TINT_BUILD_WGSL_WRITER OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled. # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V. if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -680,7 +680,6 @@ if (onnxruntime_USE_WEBGPU) endif() if (ANDROID) - set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key for Vulkan shader endif() onnxruntime_fetchcontent_makeavailable(dawn) From 788e129b27c657ec1d162ab7dcc65e401a9dea95 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 09:21:07 +1000 Subject: [PATCH 08/28] Refine external libraries to add dependencies --- .../external/onnxruntime_external_deps.cmake | 67 +++++++++++++++++-- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index d515e117e0718..daec9ad75e061 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -575,10 +575,6 @@ if (onnxruntime_USE_MIMALLOC) onnxruntime_fetchcontent_makeavailable(mimalloc) endif() -#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn, -# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread -# pthread is always at the last -set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME}) # The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types. # The other libs do not have the problem. All the sources are already there. We can compile them in any order. set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers) @@ -701,8 +697,69 @@ endif() if(onnxruntime_USE_SNPE) include(external/find_snpe.cmake) - list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS}) endif() +# add dependencies to the list of external libraries and populate onnxruntime_EXTERNAL_LIBRARIES with the result +function(add_dependencies_to_external_libs output_var) + set (external_libs ${ARGN}) + set(extended_deps) + + function(get_dependencies input_target) + message(STATUS "get_dependencies: ${input_target}") + get_target_property(alias ${input_target} ALIASED_TARGET) + if(TARGET ${alias}) + set(input_target ${alias}) + endif() + + if(${input_target} IN_LIST all_dependencies) + return() + endif() + + list(APPEND all_dependencies ${input_target}) + + get_target_property(link_libraries ${input_target} LINK_LIBRARIES) + foreach(dependency IN LISTS link_libraries) + if(TARGET ${dependency}) + get_dependencies(${dependency}) + endif() + endforeach() + + # get_target_property(link_libraries ${input_target} INTERFACE_LINK_LIBRARIES) + # foreach(dependency IN LISTS link_libraries) + # if(TARGET ${dependency}) + # get_dependencies(${dependency}) + # endif() + # endforeach() + + set(all_dependencies ${all_dependencies} PARENT_SCOPE) + endfunction() + + foreach(external_lib IN LISTS external_libs) + message(STATUS "### Getting dependencies for : ${external_lib}") + get_dependencies(${external_lib}) + endforeach() + + foreach(dependency IN LISTS all_dependencies) + get_target_property(type ${dependency} TYPE) + if((${type} STREQUAL "STATIC_LIBRARY" OR ${type} STREQUAL "OBJECT_LIBRARY") AND + NOT ${dependency} IN_LIST external_libs_extended) + list(APPEND extended_deps ${dependency}) + endif() + endforeach() + + set(${output_var} ${extended_deps} PARENT_SCOPE) +endfunction() + +# Create list of external libraries potentially added in this file. +set(_external_libraries ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${SNPE_NN_LIBS} ${WIL_TARGET} + dawn::dawn_native dawn::dawn_proc nlohmann_json::nlohmann_json + onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface + flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date + ${ONNXRUNTIME_CLOG_TARGET_NAME}) + +# add the dependencies as well. this is need in some places where we have to process the full list of libraries +# e.g. iOS pre-linking. +add_dependencies_to_external_libs(onnxruntime_EXTERNAL_LIBRARIES "${_external_libraries}") + FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) From edf50561d4155a7dd8df21c653ca576d0aa727dc Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 11:21:43 +1000 Subject: [PATCH 09/28] Try again --- cmake/external/helper_functions.cmake | 59 +++++++++++ .../external/onnxruntime_external_deps.cmake | 97 ++++--------------- 2 files changed, 80 insertions(+), 76 deletions(-) diff --git a/cmake/external/helper_functions.cmake b/cmake/external/helper_functions.cmake index e3f2211f96158..f16b7baeeb240 100644 --- a/cmake/external/helper_functions.cmake +++ b/cmake/external/helper_functions.cmake @@ -14,6 +14,65 @@ function(set_folder_for_subdir_targets srcDir folderName) endforeach() endfunction() +# Add a new library and it's dependencies to an existing list. +# The new library and any dependencies it has that are not already in the list will be prepended to the existing list. +# output_var will be set to the combined list. +# +# e.g. libA is new, and depends on libB and libC. libB is already in extended_list. +# add_dependencies_to_external_lib(libA output_var "${existing_dependencies}") # need to quote existing list values +# before: existing_dependencies = [libB] +# after: output_var = [libA, libC, libB] +function(add_dependencies_to_external_libs new_lib output_var) + set(existing_deps ${ARGN}) + set(new_deps) + + function(get_dependencies input_target) + get_target_property(alias ${input_target} ALIASED_TARGET) + if(TARGET ${alias}) + set(input_target ${alias}) + endif() + + # if this already exists we don't need to recurse any more + if(${input_target} IN_LIST existing_deps OR ${input_target} IN_LIST new_deps) + return() + endif() + + list(APPEND new_deps ${input_target}) + + get_target_property(link_libraries ${input_target} LINK_LIBRARIES) + foreach(dependency IN LISTS link_libraries) + if(TARGET ${dependency}) + get_dependencies(${dependency}) + endif() + endforeach() + + # Add if needed. As this is to primarily update the items to link against, interface libraries shouldn't be relevant + # get_target_property(link_libraries ${input_target} INTERFACE_LINK_LIBRARIES) + # foreach(dependency IN LISTS link_libraries) + # if(TARGET ${dependency}) + # get_dependencies(${dependency}) + # endif() + # endforeach() + + set(new_deps ${new_deps} PARENT_SCOPE) + endfunction() + + message(STATUS "### Getting dependencies for ${new_lib}") + get_dependencies(${new_lib}) + + set(combined_deps) + foreach(dependency IN LISTS new_deps) + get_target_property(type ${dependency} TYPE) + if(${type} STREQUAL "STATIC_LIBRARY" OR ${type} STREQUAL "OBJECT_LIBRARY") + list(APPEND combined_deps ${dependency}) + endif() + endforeach() + + list(APPEND combined_deps ${existing_deps}) + message(STATUS "Combined: ${combined_deps}") + set(${output_var} ${combined_deps} PARENT_SCOPE) +endfunction() + # This file was copied from cmake source with modifications: # 1. Add the EXCLUDE_FROM_ALL keyword when this function calls add_subdirectory. It will also resolve the # 'make install' issue. diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index daec9ad75e061..5930b292eaf75 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -575,6 +575,11 @@ if (onnxruntime_USE_MIMALLOC) onnxruntime_fetchcontent_makeavailable(mimalloc) endif() +set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json + onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface + flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date + ${ONNXRUNTIME_CLOG_TARGET_NAME}) + # The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types. # The other libs do not have the problem. All the sources are already there. We can compile them in any order. set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers) @@ -675,91 +680,31 @@ if (onnxruntime_USE_WEBGPU) set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) endif() - if (ANDROID) - endif() - onnxruntime_fetchcontent_makeavailable(dawn) -endif() -message(STATUS "Finished fetching external dependencies") - -set(onnxruntime_LINK_DIRS ) - -if (onnxruntime_USE_CUDA) - find_package(CUDAToolkit REQUIRED) - - if(onnxruntime_CUDNN_HOME) - file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME) - set(CUDNN_PATH ${onnxruntime_CUDNN_HOME}) - endif() - include(cuDNN) + # Add with dependencies in reverse order as new values are added at the front in each call + add_dependencies_to_external_libs(dawn::dawn_proc onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") + add_dependencies_to_external_libs(dawn::native onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") endif() +set(onnxruntime_LINK_DIRS) if(onnxruntime_USE_SNPE) - include(external/find_snpe.cmake) + include(external/find_snpe.cmake) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS}) endif() -# add dependencies to the list of external libraries and populate onnxruntime_EXTERNAL_LIBRARIES with the result -function(add_dependencies_to_external_libs output_var) - set (external_libs ${ARGN}) - set(extended_deps) - - function(get_dependencies input_target) - message(STATUS "get_dependencies: ${input_target}") - get_target_property(alias ${input_target} ALIASED_TARGET) - if(TARGET ${alias}) - set(input_target ${alias}) - endif() - - if(${input_target} IN_LIST all_dependencies) - return() - endif() - - list(APPEND all_dependencies ${input_target}) - - get_target_property(link_libraries ${input_target} LINK_LIBRARIES) - foreach(dependency IN LISTS link_libraries) - if(TARGET ${dependency}) - get_dependencies(${dependency}) - endif() - endforeach() - - # get_target_property(link_libraries ${input_target} INTERFACE_LINK_LIBRARIES) - # foreach(dependency IN LISTS link_libraries) - # if(TARGET ${dependency}) - # get_dependencies(${dependency}) - # endif() - # endforeach() - - set(all_dependencies ${all_dependencies} PARENT_SCOPE) - endfunction() - - foreach(external_lib IN LISTS external_libs) - message(STATUS "### Getting dependencies for : ${external_lib}") - get_dependencies(${external_lib}) - endforeach() - - foreach(dependency IN LISTS all_dependencies) - get_target_property(type ${dependency} TYPE) - if((${type} STREQUAL "STATIC_LIBRARY" OR ${type} STREQUAL "OBJECT_LIBRARY") AND - NOT ${dependency} IN_LIST external_libs_extended) - list(APPEND extended_deps ${dependency}) - endif() - endforeach() - - set(${output_var} ${extended_deps} PARENT_SCOPE) -endfunction() +if (onnxruntime_USE_CUDA) + find_package(CUDAToolkit REQUIRED) -# Create list of external libraries potentially added in this file. -set(_external_libraries ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${SNPE_NN_LIBS} ${WIL_TARGET} - dawn::dawn_native dawn::dawn_proc nlohmann_json::nlohmann_json - onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface - flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date - ${ONNXRUNTIME_CLOG_TARGET_NAME}) + if(onnxruntime_CUDNN_HOME) + file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME) + set(CUDNN_PATH ${onnxruntime_CUDNN_HOME}) + endif() -# add the dependencies as well. this is need in some places where we have to process the full list of libraries -# e.g. iOS pre-linking. -add_dependencies_to_external_libs(onnxruntime_EXTERNAL_LIBRARIES "${_external_libraries}") + include(cuDNN) +endif() FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) + +message(STATUS "Finished fetching external dependencies") From 0ad21bfdf1287e15e12b679107c050cd872a99e5 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 11:31:59 +1000 Subject: [PATCH 10/28] De-alias existing deps --- cmake/external/helper_functions.cmake | 14 ++++++++++++-- cmake/external/onnxruntime_external_deps.cmake | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cmake/external/helper_functions.cmake b/cmake/external/helper_functions.cmake index f16b7baeeb240..506c199ad28ce 100644 --- a/cmake/external/helper_functions.cmake +++ b/cmake/external/helper_functions.cmake @@ -23,9 +23,19 @@ endfunction() # before: existing_dependencies = [libB] # after: output_var = [libA, libC, libB] function(add_dependencies_to_external_libs new_lib output_var) - set(existing_deps ${ARGN}) + set(existing_deps_in ${ARGN}) set(new_deps) + # need to de-alias existing_deps + foreach(existing_dep IN LISTS existing_deps_in) + get_target_property(alias ${existing_dep} ALIASED_TARGET) + if(TARGET ${alias}) + list(APPEND existing_deps ${alias}) + else() + list(APPEND existing_deps ${existing_dep}) + endif() + endforeach() + function(get_dependencies input_target) get_target_property(alias ${input_target} ALIASED_TARGET) if(TARGET ${alias}) @@ -68,7 +78,7 @@ function(add_dependencies_to_external_libs new_lib output_var) endif() endforeach() - list(APPEND combined_deps ${existing_deps}) + list(APPEND combined_deps ${existing_deps_in}) message(STATUS "Combined: ${combined_deps}") set(${output_var} ${combined_deps} PARENT_SCOPE) endfunction() diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 5930b292eaf75..7bf42acf6827c 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -684,7 +684,7 @@ if (onnxruntime_USE_WEBGPU) # Add with dependencies in reverse order as new values are added at the front in each call add_dependencies_to_external_libs(dawn::dawn_proc onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") - add_dependencies_to_external_libs(dawn::native onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") + add_dependencies_to_external_libs(dawn::dawn_native onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") endif() set(onnxruntime_LINK_DIRS) From ee1d958b83d912e109cc1cdcc9a87ea76161988d Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Sun, 15 Sep 2024 19:30:07 -0700 Subject: [PATCH 11/28] Fix C++20 errors --- include/onnxruntime/core/common/logging/logging.h | 9 +++++---- onnxruntime/core/common/logging/sinks/ostream_sink.cc | 3 ++- .../core/platform/apple/logging/apple_log_sink.mm | 4 +++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 55b5c25d1a222..9a2d8dd71b049 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -58,10 +58,11 @@ namespace logging { using Timestamp = std::chrono::time_point; -// TODO: When other compilers support std::chrono::operator<<, update this. -// TODO: Check support for other compilers' version before enable C++20 for other compilers. -// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4. -#if __cplusplus >= 202002L && __MAC_OS_X_VERSION_MAX_ALLOWED >= 140400L +// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, but the target macOS version must be +// >= 13.3 for it to be used. +#if __cplusplus >= 202002L && \ + (!defined(__MAC_OS_X_VERSION_MAX_ALLOWED) || __MAC_OS_X_VERSION_MAX_ALLOWED >= 140400L) && \ + (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED >= 130300L) namespace timestamp_ns = std::chrono; #else namespace timestamp_ns = ::date; diff --git a/onnxruntime/core/common/logging/sinks/ostream_sink.cc b/onnxruntime/core/common/logging/sinks/ostream_sink.cc index 033f4d2573cda..82af514ef3c63 100644 --- a/onnxruntime/core/common/logging/sinks/ostream_sink.cc +++ b/onnxruntime/core/common/logging/sinks/ostream_sink.cc @@ -45,7 +45,8 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger } #endif - msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " + timestamp_ns::operator<<(msg, timestamp); // handle ambiguity with C++20 where date and std::chrono have operator<< + msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " << message.Location().ToString() << "] " << message.Message(); #ifndef ORT_MINIMAL_BUILD diff --git a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm index 78614ffd2819d..88f2a828cf445 100644 --- a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm +++ b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm @@ -15,7 +15,9 @@ void AppleLogSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) { using timestamp_ns::operator<<; std::ostringstream msg; - msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " + + timestamp_ns::operator<<(msg, timestamp); // handle ambiguity with C++20 where date and std::chrono have operator<< + msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " << message.Location().ToString() << "] " << message.Message(); NSLog(@"%s", msg.str().c_str()); } From 3cb0c6a4de53a9da8e4026ca76c44ae30dff44f1 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 12:59:44 +1000 Subject: [PATCH 12/28] Fix c++20 errors --- include/onnxruntime/core/common/logging/logging.h | 9 +++++---- onnxruntime/core/common/logging/sinks/ostream_sink.cc | 3 ++- .../core/platform/apple/logging/apple_log_sink.mm | 4 +++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 55b5c25d1a222..9a2d8dd71b049 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -58,10 +58,11 @@ namespace logging { using Timestamp = std::chrono::time_point; -// TODO: When other compilers support std::chrono::operator<<, update this. -// TODO: Check support for other compilers' version before enable C++20 for other compilers. -// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4. -#if __cplusplus >= 202002L && __MAC_OS_X_VERSION_MAX_ALLOWED >= 140400L +// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, but the target macOS version must be +// >= 13.3 for it to be used. +#if __cplusplus >= 202002L && \ + (!defined(__MAC_OS_X_VERSION_MAX_ALLOWED) || __MAC_OS_X_VERSION_MAX_ALLOWED >= 140400L) && \ + (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED >= 130300L) namespace timestamp_ns = std::chrono; #else namespace timestamp_ns = ::date; diff --git a/onnxruntime/core/common/logging/sinks/ostream_sink.cc b/onnxruntime/core/common/logging/sinks/ostream_sink.cc index 033f4d2573cda..82af514ef3c63 100644 --- a/onnxruntime/core/common/logging/sinks/ostream_sink.cc +++ b/onnxruntime/core/common/logging/sinks/ostream_sink.cc @@ -45,7 +45,8 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger } #endif - msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " + timestamp_ns::operator<<(msg, timestamp); // handle ambiguity with C++20 where date and std::chrono have operator<< + msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " << message.Location().ToString() << "] " << message.Message(); #ifndef ORT_MINIMAL_BUILD diff --git a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm index 78614ffd2819d..88f2a828cf445 100644 --- a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm +++ b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm @@ -15,7 +15,9 @@ void AppleLogSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) { using timestamp_ns::operator<<; std::ostringstream msg; - msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " + + timestamp_ns::operator<<(msg, timestamp); // handle ambiguity with C++20 where date and std::chrono have operator<< + msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " << message.Location().ToString() << "] " << message.Message(); NSLog(@"%s", msg.str().c_str()); } From 7b85ddaf6d2b8f957289c26273b57ba22479bd28 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 18:18:15 +1000 Subject: [PATCH 13/28] Update some apple infra --- cmake/onnxruntime.cmake | 16 ++++++++++++++-- .../apple/build_and_assemble_apple_pods.py | 2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 81f5c20070c81..bee2cabe7460b 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -89,10 +89,22 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK) # create Info.plist for the framework and podspec for CocoaPods (optional) set(MACOSX_FRAMEWORK_NAME "onnxruntime") set(MACOSX_FRAMEWORK_IDENTIFIER "com.microsoft.onnxruntime") - # Need to include CoreML as a weaklink for CocoaPods package if the EP is enabled + + # Setup weak frameworks for macOS/iOS. 'weak' as the CoreML or WebGPU EPs are optionally enabled. if(onnxruntime_USE_COREML) - set(APPLE_WEAK_FRAMEWORK "\\\"CoreML\\\"") + list(APPEND _weak_frameworks "\\\"CoreML\\\"") + endif() + + if(onnxruntime_USE_WEBGPU) + # TODO: Dawn includes all these. TBD if we need any others. As we're not doing anything graphical we may not. + # Cocoa (MacOS only), Foundation, IOKit, IOSurface, QuartzCore + list(APPEND _weak_frameworks "\\\"Metal\\\"") endif() + + if (_weak_frameworks) + string(JOIN ", " APPLE_WEAK_FRAMEWORK ${_weak_frameworks}) + endif() + set(INFO_PLIST_PATH "${CMAKE_CURRENT_BINARY_DIR}/Info.plist") configure_file(${REPO_ROOT}/cmake/Info.plist.in ${INFO_PLIST_PATH}) configure_file( diff --git a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py index 71aeb9e7b0304..dd037c17ae3b3 100755 --- a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py +++ b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py @@ -133,6 +133,8 @@ def main(): str(build_dir / "framework_out"), "--variant", package_variant.name, + "--test_project_stage_dir", # use a specific directory so it's easier to debug + str(build_dir / "test_apple_packages_staging"), ] run(test_apple_packages_args) From 1da2bcea0576f2b13c4907052ef6010217b8ac7e Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 18:21:37 +1000 Subject: [PATCH 14/28] Enable webgpu in some configs to test via CI --- .../apple/default_full_apple_framework_build_settings.json | 1 + .../github/apple/default_full_ios_framework_build_settings.json | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json b/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json index 84d7e355ed5b4..6175ac3a0ad58 100644 --- a/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json +++ b/tools/ci_build/github/apple/default_full_apple_framework_build_settings.json @@ -19,6 +19,7 @@ "--build_apple_framework", "--use_coreml", "--use_xnnpack", + "--use_webgpu", "--skip_tests", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF" ], diff --git a/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json b/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json index e2d8f70c02cf3..91646a8fbeb38 100644 --- a/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json +++ b/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json @@ -17,6 +17,7 @@ "--parallel", "--build_apple_framework", "--use_coreml", + "--use_webgpu", "--skip_tests", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF" ], From 259aa5df90b506c7ccb6a63ca4d4c9c1102b9c8e Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Mon, 16 Sep 2024 18:25:05 +1000 Subject: [PATCH 15/28] Update one more CI --- .../azure-pipelines/templates/mac-cpu-packing-jobs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml index 01ec3b5a2f8ca..045de0da1fee1 100644 --- a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml +++ b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packing-jobs.yml @@ -98,7 +98,7 @@ jobs: - template: mac-cpu-packaging-steps.yml parameters: MacosArch: ${{ parameters.MacosArch }} - AdditionalBuildFlags: ${{ parameters.AdditionalBuildFlags }} --use_coreml --cmake_extra_defines CMAKE_OSX_ARCHITECTURES="arm64;x86_64" + AdditionalBuildFlags: ${{ parameters.AdditionalBuildFlags }} --use_coreml --use_webgpu --cmake_extra_defines CMAKE_OSX_ARCHITECTURES="arm64;x86_64" BuildJava: false BuildNodejs: false WithCache: ${{ parameters.WithCache }} @@ -110,7 +110,7 @@ jobs: - template: mac-cpu-packaging-steps.yml parameters: MacosArch: ${{ parameters.MacosArch }} - AdditionalBuildFlags: ${{ parameters.AdditionalBuildFlags }} --build_nodejs --build_java --use_coreml --cmake_extra_defines CMAKE_OSX_ARCHITECTURES=arm64 + AdditionalBuildFlags: ${{ parameters.AdditionalBuildFlags }} --build_nodejs --build_java --use_coreml --use_webgpu --cmake_extra_defines CMAKE_OSX_ARCHITECTURES=arm64 BuildJava: true BuildNodejs: true WithCache: ${{ parameters.WithCache }} @@ -122,7 +122,7 @@ jobs: - template: mac-cpu-packaging-steps.yml parameters: MacosArch: ${{ parameters.MacosArch }} - AdditionalBuildFlags: ${{ parameters.AdditionalBuildFlags }} --build_nodejs --build_java --use_coreml + AdditionalBuildFlags: ${{ parameters.AdditionalBuildFlags }} --build_nodejs --build_java --use_coreml --use_webgpu BuildJava: true BuildNodejs: true WithCache: ${{ parameters.WithCache }} From 64ccd2de32ca70da2490abdfa5e91245bd51a2d8 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 17 Sep 2024 16:55:38 +1000 Subject: [PATCH 16/28] Fix some build and test issues --- cmake/external/helper_functions.cmake | 69 ------------------- .../external/onnxruntime_external_deps.cmake | 10 ++- cmake/onnxruntime.cmake | 49 +++++++++++-- .../main/java/ai/onnxruntime/OrtProvider.java | 4 +- .../webgpu/webgpu_execution_provider.cc | 4 +- 5 files changed, 54 insertions(+), 82 deletions(-) diff --git a/cmake/external/helper_functions.cmake b/cmake/external/helper_functions.cmake index 506c199ad28ce..e3f2211f96158 100644 --- a/cmake/external/helper_functions.cmake +++ b/cmake/external/helper_functions.cmake @@ -14,75 +14,6 @@ function(set_folder_for_subdir_targets srcDir folderName) endforeach() endfunction() -# Add a new library and it's dependencies to an existing list. -# The new library and any dependencies it has that are not already in the list will be prepended to the existing list. -# output_var will be set to the combined list. -# -# e.g. libA is new, and depends on libB and libC. libB is already in extended_list. -# add_dependencies_to_external_lib(libA output_var "${existing_dependencies}") # need to quote existing list values -# before: existing_dependencies = [libB] -# after: output_var = [libA, libC, libB] -function(add_dependencies_to_external_libs new_lib output_var) - set(existing_deps_in ${ARGN}) - set(new_deps) - - # need to de-alias existing_deps - foreach(existing_dep IN LISTS existing_deps_in) - get_target_property(alias ${existing_dep} ALIASED_TARGET) - if(TARGET ${alias}) - list(APPEND existing_deps ${alias}) - else() - list(APPEND existing_deps ${existing_dep}) - endif() - endforeach() - - function(get_dependencies input_target) - get_target_property(alias ${input_target} ALIASED_TARGET) - if(TARGET ${alias}) - set(input_target ${alias}) - endif() - - # if this already exists we don't need to recurse any more - if(${input_target} IN_LIST existing_deps OR ${input_target} IN_LIST new_deps) - return() - endif() - - list(APPEND new_deps ${input_target}) - - get_target_property(link_libraries ${input_target} LINK_LIBRARIES) - foreach(dependency IN LISTS link_libraries) - if(TARGET ${dependency}) - get_dependencies(${dependency}) - endif() - endforeach() - - # Add if needed. As this is to primarily update the items to link against, interface libraries shouldn't be relevant - # get_target_property(link_libraries ${input_target} INTERFACE_LINK_LIBRARIES) - # foreach(dependency IN LISTS link_libraries) - # if(TARGET ${dependency}) - # get_dependencies(${dependency}) - # endif() - # endforeach() - - set(new_deps ${new_deps} PARENT_SCOPE) - endfunction() - - message(STATUS "### Getting dependencies for ${new_lib}") - get_dependencies(${new_lib}) - - set(combined_deps) - foreach(dependency IN LISTS new_deps) - get_target_property(type ${dependency} TYPE) - if(${type} STREQUAL "STATIC_LIBRARY" OR ${type} STREQUAL "OBJECT_LIBRARY") - list(APPEND combined_deps ${dependency}) - endif() - endforeach() - - list(APPEND combined_deps ${existing_deps_in}) - message(STATUS "Combined: ${combined_deps}") - set(${output_var} ${combined_deps} PARENT_SCOPE) -endfunction() - # This file was copied from cmake source with modifications: # 1. Add the EXCLUDE_FROM_ALL keyword when this function calls add_subdirectory. It will also resolve the # 'make install' issue. diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 7bf42acf6827c..22aae32d05744 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -642,7 +642,7 @@ if (onnxruntime_USE_WEBGPU) PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch ) - # use dawn::native_objects and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size + # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE) set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE) set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) @@ -682,9 +682,7 @@ if (onnxruntime_USE_WEBGPU) onnxruntime_fetchcontent_makeavailable(dawn) - # Add with dependencies in reverse order as new values are added at the front in each call - add_dependencies_to_external_libs(dawn::dawn_proc onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") - add_dependencies_to_external_libs(dawn::dawn_native onnxruntime_EXTERNAL_LIBRARIES "${onnxruntime_EXTERNAL_LIBRARIES}") + list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc) endif() set(onnxruntime_LINK_DIRS) @@ -704,7 +702,7 @@ if (onnxruntime_USE_CUDA) include(cuDNN) endif() -FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) -FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) +FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) +FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) message(STATUS "Finished fetching external dependencies") diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index bee2cabe7460b..2f4ffbb6adbd7 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -376,16 +376,57 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK) endif() endforeach() + set(_processed_libs) # keep track of processed libraries to skip any duplicate dependencies + function(add_symlink_for_static_lib_and_dependencies lib) + function(process cur_target) + # de-alias if applicable so a consistent target name is used + get_target_property(alias ${cur_target} ALIASED_TARGET) + if(TARGET ${alias}) + set(cur_target ${alias}) + endif() + + if(${cur_target} IN_LIST _processed_libs OR ${cur_target} IN_LIST lib_and_dependencies) + return() + endif() + + list(APPEND lib_and_dependencies ${cur_target}) + + get_target_property(link_libraries ${cur_target} LINK_LIBRARIES) + foreach(dependency ${link_libraries}) + if(TARGET ${dependency}) + process(${dependency}) + endif() + endforeach() + + set(lib_and_dependencies ${lib_and_dependencies} PARENT_SCOPE) + endfunction() + + set(lib_and_dependencies) + process(${lib}) + + foreach(_target ${lib_and_dependencies}) + get_target_property(type ${_target} TYPE) + if(${type} STREQUAL "STATIC_LIBRARY") + # message(STATUS "Adding symlink for ${_target}") + add_custom_command(TARGET onnxruntime POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink + $ ${STATIC_LIB_DIR}/$) + endif() + endforeach() + + list(APPEND _processed_libs ${lib_and_dependencies}) + set(_processed_libs ${_processed_libs} PARENT_SCOPE) + endfunction() + # for external libraries we create a symlink to the .a file foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES}) - if(NOT TARGET ${_LIB}) # if we didn't build from source. it may not a target + if(NOT TARGET ${_LIB}) # if we didn't build from source it may not be a target continue() endif() + GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE) if(_LIB_TYPE STREQUAL "STATIC_LIBRARY") - add_custom_command(TARGET onnxruntime POST_BUILD - COMMAND ${CMAKE_COMMAND} -E create_symlink - $ ${STATIC_LIB_DIR}/$) + add_symlink_for_static_lib_and_dependencies(${_LIB}) endif() endforeach() diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java index ae9cb9f908629..b06f884896ee8 100644 --- a/java/src/main/java/ai/onnxruntime/OrtProvider.java +++ b/java/src/main/java/ai/onnxruntime/OrtProvider.java @@ -40,7 +40,9 @@ public enum OrtProvider { /** The XNNPACK execution provider. */ XNNPACK("XnnpackExecutionProvider"), /** The Azure remote endpoint execution provider. */ - AZURE("AzureExecutionProvider"); + AZURE("AzureExecutionProvider"), + /** The WebGPU execution provider */ + WEBGPU("WebGpuExecutionProvider"); private static final Map valueMap = new HashMap<>(values().length); diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc index d049cbbf64560..c689909e73c4b 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -115,7 +115,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Cosh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Asinh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Acosh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Atanh); +// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Atanh); TEMPORARY - Doesn't handle 1.0f -> inf with Metal class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 6, 12, Tanh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Tanh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, Not); @@ -428,7 +428,7 @@ std::unique_ptr RegisterKernels() { KERNEL_CREATE_INFO(9, Cosh), KERNEL_CREATE_INFO(9, Asinh), KERNEL_CREATE_INFO(9, Acosh), - KERNEL_CREATE_INFO(9, Atanh), + // KERNEL_CREATE_INFO(9, Atanh), TEMPORARY - Doesn't handle 1.0f -> inf with Metal KERNEL_CREATE_INFO_VERSIONED(6, 12, Tanh), KERNEL_CREATE_INFO(13, Tanh), // KERNEL_CREATE_INFO(1, Not), From ce23c21bd8b8cf2d6663cb4efd73f34820186aa3 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 18 Sep 2024 16:31:14 +1000 Subject: [PATCH 17/28] Expand check on whether std::chrono::operator<< can be used to cover catalyst Add additional required frameworks --- cmake/onnxruntime.cmake | 4 ++- .../onnxruntime/core/common/logging/logging.h | 35 +++++++++++++++---- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 2f4ffbb6adbd7..d601f15b3a3d8 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -97,7 +97,9 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK) if(onnxruntime_USE_WEBGPU) # TODO: Dawn includes all these. TBD if we need any others. As we're not doing anything graphical we may not. - # Cocoa (MacOS only), Foundation, IOKit, IOSurface, QuartzCore + # Cocoa (MacOS only), Foundation, IOKit + list(APPEND _weak_frameworks "\\\"QuartzCore\\\"") + list(APPEND _weak_frameworks "\\\"IOSurface\\\"") list(APPEND _weak_frameworks "\\\"Metal\\\"") endif() diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 9a2d8dd71b049..d16def7b91cd4 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -58,16 +58,37 @@ namespace logging { using Timestamp = std::chrono::time_point; -// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, but the target macOS version must be -// >= 13.3 for it to be used. -#if __cplusplus >= 202002L && \ - (!defined(__MAC_OS_X_VERSION_MAX_ALLOWED) || __MAC_OS_X_VERSION_MAX_ALLOWED >= 140400L) && \ - (!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED >= 130300L) -namespace timestamp_ns = std::chrono; +// C++20 has operator<< in std::chrono for Timestamp type but some mac builds have additional checks on the +// target deployment. +#define _USE_CXX20_STD_CHRONO __cplusplus >= 202002L + +// Apply constraints for mac builds +#if __APPLE__ + #include + // Catalyst check must be first as it has both TARGET_OS_MACCATALYST and TARGET_OS_MAC set + #if TARGET_OS_MACCATALYST + // maccatalyst requires version 16.3 + #if (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 160300) + #undef _USE_CXX20_STD_CHRONO + #endif + #elif TARGET_OS_MAC + // Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, + // but the target macOS version must be >= 13.3 for it to be used. + #if (defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED < 140400) || \ + (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 130300) + #undef _USE_CXX20_STD_CHRONO + #endif + #endif +#endif + +#if defined(_USE_CXX20_STD_CHRONO) + namespace timestamp_ns = std::chrono; #else -namespace timestamp_ns = ::date; + namespace timestamp_ns = ::date; #endif +#undef _USE_CXX20_STD_CHRONO + #ifndef NDEBUG ORT_ATTRIBUTE_UNUSED static bool vlog_enabled = true; // Set directly based on your needs. #else From 51db660147071f4ba6f4316c2465a3314f4bb92f Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 18 Sep 2024 17:10:26 +1000 Subject: [PATCH 18/28] Fix condition. Leave in some pragmas for debugging build failures short term --- .../onnxruntime/core/common/logging/logging.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index d16def7b91cd4..6384594c07123 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -81,10 +81,20 @@ using Timestamp = std::chrono::time_point; #endif #endif -#if defined(_USE_CXX20_STD_CHRONO) - namespace timestamp_ns = std::chrono; +#define STRINGIFY(x) STRINGIFY2(x) +#define STRINGIFY2(x) #x + +#pragma message("_USE_CXX20_STD_CHRONO is " STRINGIFY(_USE_CXX20_STD_CHRONO)) +#pragma message("TARGET_OS_MAC is " STRINGIFY(TARGET_OS_MAC)) +#pragma message("TARGET_OS_MACCATALYST is " STRINGIFY(TARGET_OS_MACCATALYST)) +#pragma message("__IPHONE_OS_VERSION_MIN_REQUIRED is " STRINGIFY(__IPHONE_OS_VERSION_MIN_REQUIRED)) + +#if _USE_CXX20_STD_CHRONO +namespace timestamp_ns = std::chrono; +#pragma message("Using std::chrono") #else - namespace timestamp_ns = ::date; +namespace timestamp_ns = ::date; +#pragma message("Using ::date") #endif #undef _USE_CXX20_STD_CHRONO From b712ebc5802ee5a87ff35a5b5db1e00d5464e653 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 18 Sep 2024 17:37:21 +1000 Subject: [PATCH 19/28] Add dummy header --- .../providers/webgpu/webgpu_provider_factory.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h diff --git a/include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h b/include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h new file mode 100644 index 0000000000000..0b45b847d651f --- /dev/null +++ b/include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Dummy file to provide a signal in the ONNX Runtime C cocoapod as to whether the WebGPU EP was included in the build. +// If it was, this file will be included in the cocoapod, and a test like this can be used: +// +// #if __has_include() +// #define WEBGPU_EP_AVAILABLE 1 +// #else +// #define WEBGPU_EP_AVAILABLE 0 +// #endif + +// The WebGPU EP can be enabled via the generic SessionOptionsAppendExecutionProvider method, so no direct usage of +// the provider factory is required. From 45f3bbfa13eec78b3087961d45179f5bb992bb08 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 18 Sep 2024 17:52:40 +1000 Subject: [PATCH 20/28] Update apple uitest apps to run webgpu tests --- .../ios_package_uitest_cpp_api.mm | 27 ++++++++++++++---- .../macos_package_uitest_cpp_api.mm | 28 +++++++++++++++---- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm index d145a00b1348f..3783d684b891d 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm @@ -12,16 +12,20 @@ #include #if __has_include() -#define COREML_EP_AVAILABLE 1 + #define COREML_EP_AVAILABLE 1 + #include #else -#define COREML_EP_AVAILABLE 0 + #define COREML_EP_AVAILABLE 0 #endif -#if COREML_EP_AVAILABLE -#include +#if __has_include() + #define WEBGPUL_EP_AVAILABLE 1 + // WebGPU EP doesn't require including the header as it's enabled via AddExecutionProvider +#else + #define WEBGPU_EP_AVAILABLE 0 #endif -void testSigmoid(const char* modelPath, bool useCoreML) { +void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = false) { // This is an e2e test for ORT C++ API Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "testCppAPI"); @@ -38,6 +42,12 @@ void testSigmoid(const char* modelPath, bool useCoreML) { (void)useCoreML; #endif + if (useWebGPU) { + std::unordered_map provider_options; + // set provider options if needed. e.g. deviceId + session_options.OrtSessionOptionsAppendExecutionProvider("WebGPU", provider_options); + } + Ort::Session session(env, modelPath, session_options); size_t input_tensor_size = 3 * 4 * 5; @@ -96,7 +106,7 @@ - (NSString*)getFilePath { } - (void)testCppAPI_Basic { - testSigmoid([self getFilePath].UTF8String, false /* useCoreML */); + testSigmoid([self getFilePath].UTF8String); } #if COREML_EP_AVAILABLE @@ -105,4 +115,9 @@ - (void)testCppAPI_Basic_CoreML { } #endif +#if WEBGPU_EP_AVAILABLE +- (void)testCppAPI_Basic_WebGPU { + testSigmoid([self getFilePath].UTF8String, false /* useCoreML */, true /* useWebGPU */); +} +#endif @end diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm index 613c6e545939f..b9a7074593488 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm @@ -12,16 +12,20 @@ #include #if __has_include() -#define COREML_EP_AVAILABLE 1 + #define COREML_EP_AVAILABLE 1 + #include #else -#define COREML_EP_AVAILABLE 0 + #define COREML_EP_AVAILABLE 0 #endif -#if COREML_EP_AVAILABLE -#include +#if __has_include() + #define WEBGPUL_EP_AVAILABLE 1 + // WebGPU EP doesn't require including the header as it's enabled via AddExecutionProvider +#else + #define WEBGPU_EP_AVAILABLE 0 #endif -void testSigmoid(const char* modelPath, bool useCoreML) { +void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = false) { // This is an e2e test for ORT C++ API Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "testCppAPI"); @@ -38,6 +42,12 @@ void testSigmoid(const char* modelPath, bool useCoreML) { (void)useCoreML; #endif + if (useWebGPU) { + std::unordered_map provider_options; + // set provider options if needed. e.g. deviceId + session_options.OrtSessionOptionsAppendExecutionProvider("WebGPU", provider_options); + } + Ort::Session session(env, modelPath, session_options); size_t input_tensor_size = 3 * 4 * 5; @@ -96,7 +106,7 @@ - (NSString*)getFilePath { } - (void)testCppAPI_Basic { - testSigmoid([self getFilePath].UTF8String, false /* useCoreML */); + testSigmoid([self getFilePath].UTF8String); } #if COREML_EP_AVAILABLE @@ -105,4 +115,10 @@ - (void)testCppAPI_Basic_CoreML { } #endif +#if WEBGPU_EP_AVAILABLE +- (void)testCppAPI_Basic_WebGPU { + testSigmoid([self getFilePath].UTF8String, false /* useCoreML */, true /* useWebGPU */); +} +#endif + @end From 9b888af338ae51b4952f2fa5221d1b851525e8f5 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 18 Sep 2024 20:14:16 +1000 Subject: [PATCH 21/28] Disable WebGPU in mac-catalyst build. APIs used by Dawn are not available on catalyst. --- .../apple/default_full_ios_framework_build_settings.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json b/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json index 91646a8fbeb38..4c2c9442ab217 100644 --- a/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json +++ b/tools/ci_build/github/apple/default_full_ios_framework_build_settings.json @@ -17,7 +17,6 @@ "--parallel", "--build_apple_framework", "--use_coreml", - "--use_webgpu", "--skip_tests", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF" ], @@ -25,12 +24,14 @@ "--ios", "--use_xcode", "--use_xnnpack", + "--use_webgpu", "--apple_deploy_target=13.0" ], "iphonesimulator": [ "--ios", "--use_xcode", "--use_xnnpack", + "--use_webgpu", "--apple_deploy_target=13.0" ], "macabi":[ From 210a760ade1e64062031a1441495d028a5138c80 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Thu, 19 Sep 2024 09:01:22 +1000 Subject: [PATCH 22/28] Fix AppendExecutionProvider call --- .../ios_package_testUITests/ios_package_uitest_cpp_api.mm | 2 +- .../macos_package_testUITests/macos_package_uitest_cpp_api.mm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm index 3783d684b891d..952209a1de244 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm @@ -45,7 +45,7 @@ void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = if (useWebGPU) { std::unordered_map provider_options; // set provider options if needed. e.g. deviceId - session_options.OrtSessionOptionsAppendExecutionProvider("WebGPU", provider_options); + session_options.AppendExecutionProvider("WebGPU", provider_options); } Ort::Session session(env, modelPath, session_options); diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm index b9a7074593488..807fee92144a1 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm @@ -45,7 +45,7 @@ void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = if (useWebGPU) { std::unordered_map provider_options; // set provider options if needed. e.g. deviceId - session_options.OrtSessionOptionsAppendExecutionProvider("WebGPU", provider_options); + session_options.AppendExecutionProvider("WebGPU", provider_options); } Ort::Session session(env, modelPath, session_options); From edb998034cd69f9196ea02ce35d1faed7e0629f3 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 24 Sep 2024 08:40:46 +1000 Subject: [PATCH 23/28] reduce some diffs --- cmake/external/onnxruntime_external_deps.cmake | 10 +++++----- cmake/onnxruntime.cmake | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 22aae32d05744..6f54ce1b4face 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -686,11 +686,6 @@ if (onnxruntime_USE_WEBGPU) endif() set(onnxruntime_LINK_DIRS) -if(onnxruntime_USE_SNPE) - include(external/find_snpe.cmake) - list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS}) -endif() - if (onnxruntime_USE_CUDA) find_package(CUDAToolkit REQUIRED) @@ -702,6 +697,11 @@ if (onnxruntime_USE_CUDA) include(cuDNN) endif() +if(onnxruntime_USE_SNPE) + include(external/find_snpe.cmake) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS}) +endif() + FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 312370d96d04a..6c7b5cf2667dc 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -96,8 +96,6 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK) endif() if(onnxruntime_USE_WEBGPU) - # TODO: Dawn includes all these. TBD if we need any others. As we're not doing anything graphical we may not. - # Cocoa (MacOS only), Foundation, IOKit list(APPEND _weak_frameworks "\\\"QuartzCore\\\"") list(APPEND _weak_frameworks "\\\"IOSurface\\\"") list(APPEND _weak_frameworks "\\\"Metal\\\"") From 698e6ae1051100d8226b268824cdba373fe88292 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 24 Sep 2024 11:25:09 +1000 Subject: [PATCH 24/28] Enable in Android build for automated testing. Fix comment. --- .../ios_package_uitest_cpp_api.mm | 12 ++++++------ .../macos_package_uitest_cpp_api.mm | 12 ++++++------ .../android/default_full_aar_build_settings.json | 1 + 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm index 952209a1de244..0546d840471cc 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm @@ -12,17 +12,17 @@ #include #if __has_include() - #define COREML_EP_AVAILABLE 1 - #include +#define COREML_EP_AVAILABLE 1 +#include #else - #define COREML_EP_AVAILABLE 0 +#define COREML_EP_AVAILABLE 0 #endif #if __has_include() - #define WEBGPUL_EP_AVAILABLE 1 - // WebGPU EP doesn't require including the header as it's enabled via AddExecutionProvider +#define WEBGPUL_EP_AVAILABLE 1 +// WebGPU EP doesn't require including the header as it's enabled via AppendExecutionProvider #else - #define WEBGPU_EP_AVAILABLE 0 +#define WEBGPU_EP_AVAILABLE 0 #endif void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = false) { diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm index 807fee92144a1..efdbd9b768ec0 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm @@ -12,17 +12,17 @@ #include #if __has_include() - #define COREML_EP_AVAILABLE 1 - #include +#define COREML_EP_AVAILABLE 1 +#include #else - #define COREML_EP_AVAILABLE 0 +#define COREML_EP_AVAILABLE 0 #endif #if __has_include() - #define WEBGPUL_EP_AVAILABLE 1 - // WebGPU EP doesn't require including the header as it's enabled via AddExecutionProvider +#define WEBGPUL_EP_AVAILABLE 1 +// WebGPU EP doesn't require including the header as it's enabled via AppendExecutionProvider #else - #define WEBGPU_EP_AVAILABLE 0 +#define WEBGPU_EP_AVAILABLE 0 #endif void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = false) { diff --git a/tools/ci_build/github/android/default_full_aar_build_settings.json b/tools/ci_build/github/android/default_full_aar_build_settings.json index b0eff75812673..f08f246748a5a 100644 --- a/tools/ci_build/github/android/default_full_aar_build_settings.json +++ b/tools/ci_build/github/android/default_full_aar_build_settings.json @@ -16,6 +16,7 @@ "--build_shared_lib", "--use_nnapi", "--use_xnnpack", + "--use_webgpu", "--skip_tests" ] } From b9e98a79e395a81c81e42069a8b06577c956b1f4 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 24 Sep 2024 11:50:10 +1000 Subject: [PATCH 25/28] Fix typo in #define --- .../ios_package_testUITests/ios_package_uitest_cpp_api.mm | 2 +- .../macos_package_testUITests/macos_package_uitest_cpp_api.mm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm index 0546d840471cc..32b4b32e299d6 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm @@ -19,7 +19,7 @@ #endif #if __has_include() -#define WEBGPUL_EP_AVAILABLE 1 +#define WEBGPU_EP_AVAILABLE 1 // WebGPU EP doesn't require including the header as it's enabled via AppendExecutionProvider #else #define WEBGPU_EP_AVAILABLE 0 diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm index efdbd9b768ec0..86001b6cb50a5 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm @@ -19,7 +19,7 @@ #endif #if __has_include() -#define WEBGPUL_EP_AVAILABLE 1 +#define WEBGPU_EP_AVAILABLE 1 // WebGPU EP doesn't require including the header as it's enabled via AppendExecutionProvider #else #define WEBGPU_EP_AVAILABLE 0 From 4b55f23d1bff28932442b07afc487687681a5cca Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 24 Sep 2024 15:19:01 +1000 Subject: [PATCH 26/28] Fix some macos warnings. --- onnxruntime/core/providers/webgpu/shader_variable.h | 3 +++ onnxruntime/core/providers/webgpu/tensor/where.cc | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/webgpu/shader_variable.h b/onnxruntime/core/providers/webgpu/shader_variable.h index 326c6814410de..ce68fc04993e6 100644 --- a/onnxruntime/core/providers/webgpu/shader_variable.h +++ b/onnxruntime/core/providers/webgpu/shader_variable.h @@ -67,6 +67,9 @@ class ShaderIndicesHelper { public: ShaderIndicesHelper(std::string_view name, ProgramVariableDataType type, ShaderUsage usage, const TensorShape& dims); + ShaderIndicesHelper(ShaderIndicesHelper&&) = default; + ShaderIndicesHelper& operator=(ShaderIndicesHelper&&) = default; + inline int NumComponents() const { return num_components_; } // create a WGSL expression ({varname}_indices_t) for getting indices from offset. diff --git a/onnxruntime/core/providers/webgpu/tensor/where.cc b/onnxruntime/core/providers/webgpu/tensor/where.cc index 31806a0af1741..1d58538a7489c 100644 --- a/onnxruntime/core/providers/webgpu/tensor/where.cc +++ b/onnxruntime/core/providers/webgpu/tensor/where.cc @@ -59,7 +59,7 @@ Status WhereProgram::GenerateShaderCode(ShaderHelper& shader) const { const auto& b_input = shader.AddInput("b_data", ShaderUsage::UseUniform); const auto& output = shader.AddOutput("output_data", ShaderUsage::UseUniform); - auto expression = [](const std::string& a, const std::string& b, const std::string& c) -> const auto { + const auto expression = [](const std::string& a, const std::string& b, const std::string& c) -> auto { return "select(" + b + ", " + a + ", " + c + ")"; }; std::string assignment; @@ -74,10 +74,10 @@ Status WhereProgram::GenerateShaderCode(ShaderHelper& shader) const { const auto& b_indices = shader.AddIndices("b_indices"); const auto& output_indices = shader.AddIndices("output_indices"); - auto single_assignment = + const auto single_assignment = [&expression, &output_indices, &a_indices, &b_indices, &c_indices]( const std::string& rest_str, const std::string& x, const std::string& type_cast = "") - -> const auto { + -> auto { const std::string a_expression = "a_data[index_a" + x + "][component_a" + x + "]"; const std::string b_expression = "b_data[index_b" + x + "][component_b" + x + "]"; const std::string c_expression = "bool(c_data[index_c" + x + "] & (0xffu << (component_c" + x + " * 8)))"; From af7c39ea9d3798405fd64d58bd592979732487e0 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 25 Sep 2024 13:13:43 +1000 Subject: [PATCH 27/28] Fix ATanH on Metal --- .../providers/webgpu/math/unary_elementwise_ops.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc b/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc index 3b43c87fb0c82..9e8117aa34a92 100644 --- a/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc +++ b/onnxruntime/core/providers/webgpu/math/unary_elementwise_ops.cc @@ -165,7 +165,19 @@ WEBGPU_ELEMENTWISE_KERNEL(Asinh, 9, WebGpuSupportedFloatTypes()) WEBGPU_ELEMENTWISE_IMPL(Acosh, "acosh(a)") WEBGPU_ELEMENTWISE_KERNEL(Acosh, 9, WebGpuSupportedFloatTypes()) +#if __APPLE__ +// Metal returns 0 for values >= 1.0. +// Need custom impl to return +inf for 1.0 (by dividing 1 by 0), and NaN for > 1.0 (by dividing 0 by 0) +WEBGPU_ELEMENTWISE_IMPL(Atanh, + "select(" + " select(x_value_t(1.0), x_value_t(0.0), a > x_value_t(1.0)) / x_value_t(0.0)," + " atanh(a)," + " a < x_value_t(1.0))", + "", + ShaderUsage::UseValueTypeAlias) +#else WEBGPU_ELEMENTWISE_IMPL(Atanh, "atanh(a)") +#endif WEBGPU_ELEMENTWISE_KERNEL(Atanh, 9, WebGpuSupportedFloatTypes()) WEBGPU_ELEMENTWISE_IMPL(Not, "!a") From 770023f05d4c98ac958da77377d2a9b80f2c1e57 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Thu, 26 Sep 2024 09:45:09 +1000 Subject: [PATCH 28/28] Minor cleanups --- cmake/onnxruntime.cmake | 1 + .../core/providers/webgpu/webgpu_execution_provider.cc | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 6c7b5cf2667dc..b1d797ca16adc 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -376,6 +376,7 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK) endif() endforeach() + # helper function that recurses to also handle static library dependencies of the ORT external libraries set(_processed_libs) # keep track of processed libraries to skip any duplicate dependencies function(add_symlink_for_static_lib_and_dependencies lib) function(process cur_target) diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc index ea0615c0f4017..f5d66d6a24134 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -115,7 +115,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Cosh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Asinh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Acosh); -// class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Atanh); TEMPORARY - Doesn't handle 1.0f -> inf with Metal +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, Atanh); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 6, 12, Tanh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Tanh); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, Not); @@ -435,7 +435,7 @@ std::unique_ptr RegisterKernels() { KERNEL_CREATE_INFO(9, Cosh), KERNEL_CREATE_INFO(9, Asinh), KERNEL_CREATE_INFO(9, Acosh), - // KERNEL_CREATE_INFO(9, Atanh), TEMPORARY - Doesn't handle 1.0f -> inf with Metal + KERNEL_CREATE_INFO(9, Atanh), KERNEL_CREATE_INFO_VERSIONED(6, 12, Tanh), KERNEL_CREATE_INFO(13, Tanh), KERNEL_CREATE_INFO(1, Not),