From 22ae97c7dc7822eb19cafa0bd9bfee67da3075ec Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:19:43 -0800 Subject: [PATCH 01/13] [webgpu] Add Alias def for Flatten (#23038) ### Description Add `Alias` definition for Flatten in WebGPU EP. also add int32/uint32 in type constraint T. --- .../core/providers/webgpu/tensor/flatten.cc | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/webgpu/tensor/flatten.cc b/onnxruntime/core/providers/webgpu/tensor/flatten.cc index 81d28bd3c0fa7..11ded865b6be2 100644 --- a/onnxruntime/core/providers/webgpu/tensor/flatten.cc +++ b/onnxruntime/core/providers/webgpu/tensor/flatten.cc @@ -13,7 +13,10 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( kOnnxDomain, 1, 8, kWebGpuExecutionProvider, - (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + (*KernelDefBuilder::Create()) + .Alias(0, 0) + .TypeConstraint("T", WebGpuSupportedNumberTypes()) + .InputMemoryType(OrtMemTypeCPU, 1), Flatten); ONNX_OPERATOR_VERSIONED_KERNEL_EX( @@ -21,7 +24,10 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( kOnnxDomain, 9, 10, kWebGpuExecutionProvider, - (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + (*KernelDefBuilder::Create()) + .Alias(0, 0) + .TypeConstraint("T", WebGpuSupportedNumberTypes()) + .InputMemoryType(OrtMemTypeCPU, 1), Flatten); ONNX_OPERATOR_VERSIONED_KERNEL_EX( @@ -29,7 +35,10 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( kOnnxDomain, 11, 12, kWebGpuExecutionProvider, - (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + (*KernelDefBuilder::Create()) + .Alias(0, 0) + .TypeConstraint("T", WebGpuSupportedNumberTypes()) + .InputMemoryType(OrtMemTypeCPU, 1), Flatten); ONNX_OPERATOR_VERSIONED_KERNEL_EX( @@ -37,7 +46,10 @@ ONNX_OPERATOR_VERSIONED_KERNEL_EX( kOnnxDomain, 13, 20, kWebGpuExecutionProvider, - (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + (*KernelDefBuilder::Create()) + .Alias(0, 0) + .TypeConstraint("T", WebGpuSupportedNumberTypes()) + .InputMemoryType(OrtMemTypeCPU, 1), Flatten); ONNX_OPERATOR_KERNEL_EX( @@ -45,8 +57,11 @@ ONNX_OPERATOR_KERNEL_EX( kOnnxDomain, 21, kWebGpuExecutionProvider, - (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + (*KernelDefBuilder::Create()) + .Alias(0, 0) + .TypeConstraint("T", WebGpuSupportedNumberTypes()) + .InputMemoryType(OrtMemTypeCPU, 1), Flatten); } // namespace webgpu -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime From 2f2c73bdde98727bd8a81107dd70a321e03aac9b Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 10 Dec 2024 08:24:16 +1000 Subject: [PATCH 02/13] Miscellaneous cleanups (#23048) ### Description - fix some missing end of version markers and since_version info - fix include to use onnx_protobuf.h which handles minimal builds - we should always prefer that header over directly using the onnx ones ### Motivation and Context --- .../core/session/onnxruntime_c_api.h | 19 +++++++++++++++++++ .../graph/contrib_ops/quantization_defs.cc | 15 ++++++++------- onnxruntime/core/session/onnxruntime_c_api.cc | 5 +++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 8e881c757f9ac..a35d975ac8f1b 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4615,6 +4615,8 @@ struct OrtApi { * \param[in] num_keys * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.17. */ ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_OpenVINO_V2, _In_ OrtSessionOptions* options, @@ -4632,6 +4634,8 @@ struct OrtApi { * \param[in] num_keys * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.18. */ ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_VitisAI, _In_ OrtSessionOptions* options, @@ -4645,7 +4649,10 @@ struct OrtApi { * \param[in] mem_info OrtMemoryInfo instance * \param[in] count_or_bytes How many bytes is this scratch buffer * \param[out] out A pointer to the scrach buffer + * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.18. */ ORT_API2_STATUS(KernelContext_GetScratchBuffer, _In_ const OrtKernelContext* context, _In_ const OrtMemoryInfo* mem_info, _In_ size_t count_or_bytes, _Outptr_ void** out); @@ -4656,6 +4663,8 @@ struct OrtApi { * \param[out] out A pointer to OrtAllocator * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.18. */ ORT_API2_STATUS(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out); @@ -4677,6 +4686,8 @@ struct OrtApi { * \param[in] num_external_initializer_files Number of external files * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.18. */ ORT_API2_STATUS(AddExternalInitializersFromFilesInMemory, _In_ OrtSessionOptions* options, _In_reads_(num_external_initializer_files) const ORTCHAR_T* const* external_initializer_file_names, @@ -4699,6 +4710,8 @@ struct OrtApi { * OrtApi::ReleaseLoraAdapter. * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.20. */ ORT_API2_STATUS(CreateLoraAdapter, const ORTCHAR_T* adapter_file_path, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); @@ -4717,6 +4730,8 @@ struct OrtApi { * OrtApi::ReleaseLoraAdapter. * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.20. */ ORT_API2_STATUS(CreateLoraAdapterFromArray, _In_ const void* bytes, size_t num_bytes, _In_ OrtAllocator* allocator, _Outptr_ OrtLoraAdapter** out); @@ -4738,6 +4753,8 @@ struct OrtApi { * \param[in] adapter OrtLoraAdapter instance * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.20. */ ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter); @@ -4756,6 +4773,8 @@ struct OrtApi { * \param[in] kv_len Number of elements in the keys and values arrays * * \snippet{doc} snippets.dox OrtStatus Return Value + * + * \since Version 1.20. */ ORT_API2_STATUS(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys, _In_reads_(kv_len) const char* const* values, _In_ size_t kv_len); diff --git a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc index 6f1f1c831d191..5a3cd86b04492 100644 --- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc @@ -9,7 +9,7 @@ #include "core/graph/constants.h" #include "core/graph/contrib_ops/contrib_defs.h" #include "core/graph/contrib_ops/shape_inference_functions.h" -#include "onnx/onnx-ml.pb.h" // ? +#include "core/graph/onnx_protobuf.h" // Suppress a warning: global initializer calls a non-constexpr function 'symbol' which is from // ONNX_OPERATOR_SET_SCHEMA_EX macro and only happens in debug build @@ -23,7 +23,7 @@ void convTransposeShapeInference(InferenceContext& ctx); void convPoolShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, bool use_dilation, bool require_kernel_shape, int input1Idx, int input2Idx); namespace defs::math::utils { - void MatMulShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int input1Idx, int input2Idx); +void MatMulShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int input1Idx, int input2Idx); } } // namespace ONNX_NAMESPACE @@ -822,10 +822,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA( } } - if (all_lengths_known) { - output_shape->mutable_dim(axis)->set_dim_value(total_length); - } - })); + if (all_lengths_known) { + output_shape->mutable_dim(axis)->set_dim_value(total_length); + } + })); ONNX_MS_OPERATOR_SET_SCHEMA(QLinearWhere, 1, OpSchema() .SetDoc("Return elements, either from X or Y, depending on condition.") @@ -955,7 +955,8 @@ ONNX_MS_OPERATOR_SET_SCHEMA( AttributeProto::INT, static_cast(0)) .Attr("do_rotary", "Whether to use rotary position embedding. Default value is 0.", AttributeProto::INT, OPTIONAL_VALUE) - .Attr("past_present_share_buffer", "Corresponding past and present are same tensor, its shape is " + .Attr("past_present_share_buffer", + "Corresponding past and present are same tensor, its shape is " "(2, batch_size, num_heads, max_sequence_length, head_size)", AttributeProto::INT, OPTIONAL_VALUE) .Attr("mask_filter_value", diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 109445c877786..ca6950af0227a 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2803,12 +2803,15 @@ static constexpr OrtApi ort_api_1_to_21 = { &OrtApis::KernelInfoGetAllocator, &OrtApis::AddExternalInitializersFromFilesInMemory, // End of Version 18 - DO NOT MODIFY ABOVE (see above text for more information) + // End of Version 19 - DO NOT MODIFY ABOVE (see above text for more information) + &OrtApis::CreateLoraAdapter, &OrtApis::CreateLoraAdapterFromArray, &OrtApis::ReleaseLoraAdapter, &OrtApis::RunOptionsAddActiveLoraAdapter, &OrtApis::SetEpDynamicOptions, + // End of Version 20 - DO NOT MODIFY ABOVE (see above text for more information) }; // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase. @@ -2840,6 +2843,8 @@ static_assert(offsetof(OrtApi, GetBuildInfoString) / sizeof(void*) == 254, "Size static_assert(offsetof(OrtApi, KernelContext_GetResource) / sizeof(void*) == 265, "Size of version 16 API cannot change"); static_assert(offsetof(OrtApi, SessionOptionsAppendExecutionProvider_OpenVINO_V2) / sizeof(void*) == 275, "Size of version 17 API cannot change"); static_assert(offsetof(OrtApi, AddExternalInitializersFromFilesInMemory) / sizeof(void*) == 279, "Size of version 18 API cannot change"); +// no additions in version 19 +static_assert(offsetof(OrtApi, SetEpDynamicOptions) / sizeof(void*) == 284, "Size of version 20 API cannot change"); // So that nobody forgets to finish an API version, this check will serve as a reminder: static_assert(std::string_view(ORT_VERSION) == "1.21.0", From 8f3384b4c130fcf98b3db7689a3cef4d7497e726 Mon Sep 17 00:00:00 2001 From: amancini-N <63410090+amancini-N@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:15:20 +0100 Subject: [PATCH 03/13] Fix BeamSearch T5 if initializers are on outer scope (#23044) ### Description This PR adds the logic needed to consider only the needed implicit inputs on BeamSearch op in case of T5 model (encoder/decoder, 2 graphs). The logic added is similar to what happens in the _If_ kernel setup. ### Motivation and Context Fixes #23043 --- .../cpu/transformers/subgraph_base.cc | 15 +++++++-- .../cpu/transformers/subgraph_base.h | 1 + .../cpu/transformers/subgraph_t5_decoder.cc | 7 ++-- .../cpu/transformers/subgraph_t5_encoder.cc | 7 ++-- .../test/contrib_ops/beam_search_test.cc | 30 ++++++++++++++++++ onnxruntime/test/testdata/dummy_t5.onnx | Bin 0 -> 6815 bytes ...ummy_t5_with_outer_scope_initializers.onnx | Bin 0 -> 7135 bytes 7 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 onnxruntime/test/testdata/dummy_t5.onnx create mode 100644 onnxruntime/test/testdata/dummy_t5_with_outer_scope_initializers.onnx diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc index d675ba742e03b..7757435990a65 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc @@ -31,6 +31,7 @@ Subgraph::Subgraph( allocator_(nullptr), is_output_float16_(false) { num_implicit_inputs = static_cast(node.ImplicitInputDefs().size()); + used_implicit_inputs = std::vector(num_implicit_inputs, true); auto& subgraph_inputs = subgraph.GetInputs(); auto& subgraph_outputs = subgraph.GetOutputs(); @@ -73,8 +74,18 @@ Status Subgraph::Setup(const SessionState& session_state, // The position_ids, attention_mask, past_0, ... are created by this operator so the name doesn't matter. feed_names.insert(feed_names.end(), subgraph_input_names.begin(), subgraph_input_names.end()); - for (auto& entry : node.ImplicitInputDefs()) { - feed_names.push_back(entry->Name()); + const auto& subgraph_map = subgraph_session_state.GetOrtValueNameIdxMap(); + + const auto& implicit_input_defs = node.ImplicitInputDefs(); + for (size_t i = 0, end = num_implicit_inputs; i < end; ++i) { + const auto* entry = implicit_input_defs[i]; + int idx; + if (subgraph_map.GetIdx(entry->Name(), idx).IsOK()) { + feed_names.push_back(entry->Name()); + } else { + --num_implicit_inputs; + used_implicit_inputs[i] = false; + } } InlinedVector feed_locations; diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h index bde591626bb83..8ec9c9cbdc20f 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.h @@ -31,6 +31,7 @@ class Subgraph { const GraphViewer& subgraph; // The subgraph int num_implicit_inputs; + std::vector used_implicit_inputs; int num_subgraph_inputs; // Same as subgraph_input_names.size(), keep it for convenience. int num_subgraph_outputs; // Same as subgraph_output_names.size() diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc index 9037e58aaf31f..6c66bfc2816e4 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc @@ -281,8 +281,11 @@ Status T5DecoderSubgraph::CreateInitialFeeds( } // Pass through implicit inputs. - for (const auto* entry : implicit_inputs) { - decoder_feeds.push_back(*entry); + for (size_t i = 0; i < implicit_inputs.size(); ++i) { + const auto* entry = implicit_inputs[i]; + if (used_implicit_inputs[i]) { + decoder_feeds.push_back(*entry); + } } return Status::OK(); diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_encoder.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_encoder.cc index 51473c0c931b9..d59db4afac2c2 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_encoder.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_encoder.cc @@ -145,8 +145,11 @@ Status T5EncoderSubgraph::CreateInitialFeeds( pinned_allocator, location)); - for (const auto* entry : implicit_inputs) { - feeds.push_back(*entry); + for (size_t i = 0; i < implicit_inputs.size(); ++i) { + const auto* entry = implicit_inputs[i]; + if (used_implicit_inputs[i]) { + feeds.push_back(*entry); + } } return Status::OK(); diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc index f6fc9ea7662cb..ca600c0700682 100644 --- a/onnxruntime/test/contrib_ops/beam_search_test.cc +++ b/onnxruntime/test/contrib_ops/beam_search_test.cc @@ -7,6 +7,8 @@ #include #include "core/session/onnxruntime_cxx_api.h" #include "test/common/cuda_op_test_utils.h" +#include "test/providers/model_tester.h" +#include "test/util/include/current_test_name.h" #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_options.h" @@ -394,5 +396,33 @@ TEST(BeamSearchTest, GptBeamSearchFp16_VocabPadded) { } } +TEST(BeamSearchTest, DummyT5) { +#if defined(USE_CUDA) && defined(USE_DML) + SKIP_CUDA_TEST_WITH_DML; +#endif + ModelTester tester(CurrentTestName(), ORT_TSTR("testdata/dummy_t5.onnx")); + tester.ConfigEp(DefaultCpuExecutionProvider()); + tester.AddInput("encoder_input_ids", {1, 5}, {14, 6, 13, 9, 7}); + tester.AddOutput("sequences", {1, 3, 10}, {2, 16, 6, 14, 1, 15, 6, 14, 1, 15, 2, 3, 4, 15, 6, 14, 1, 15, 6, 14, 2, 16, 6, 14, 1, 15, 6, 14, 1, 14}); +#ifdef USE_CUDA + tester.ConfigEp(DefaultCudaExecutionProvider()); +#endif + tester.RunWithConfig(); +} + +TEST(BeamSearchTest, DummyT5WithOuterScopeInitializers) { +#if defined(USE_CUDA) && defined(USE_DML) + SKIP_CUDA_TEST_WITH_DML; +#endif + ModelTester tester(CurrentTestName(), ORT_TSTR("testdata/dummy_t5_with_outer_scope_initializers.onnx")); + tester.ConfigEp(DefaultCpuExecutionProvider()); + tester.AddInput("encoder_input_ids", {1, 5}, {14, 6, 13, 9, 7}); + tester.AddOutput("sequences", {1, 3, 10}, {2, 16, 6, 14, 1, 15, 6, 14, 1, 15, 2, 3, 4, 15, 6, 14, 1, 15, 6, 14, 2, 16, 6, 14, 1, 15, 6, 14, 1, 14}); +#ifdef USE_CUDA + tester.ConfigEp(DefaultCudaExecutionProvider()); +#endif + tester.RunWithConfig(); +} + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/testdata/dummy_t5.onnx b/onnxruntime/test/testdata/dummy_t5.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3a3bbf47675231bca6d80570ec6cedb01ed4440a GIT binary patch literal 6815 zcmb_hc|cRg_J**9D=1tP(I+k_sIjh9+{m1O2#Rb1q6H)-r-(nF4)%hE=!6^Kwp0^|J)?^oNvyYIWzNpGeTxq z>LBLI0LGqE#i$irf;3j8Nz_SW6As|1N65n9F8yT8SXn)QV9s&A2g6jiIDkrA))I zU7K!9%J;p-M&V)=QumDsXN>oU`4|}1!O)f=C)&h#7c)DsS$6m)me#_t-9Bi_ zH}7IOLamCC>FV+wt93G|X|_LOtTzlZIhHRdsU|@^m9>mhPl?s>V;vyV1ti8XlZ?B3 zR1j?YrbBHQo&cs9GuB?q7iS!wgqr-+mYR!o7@rw!O=?8;jJbwOh?kg11QJt;Sxrp^ zH4EqPHrxzZTp}m!_W{5Frd4gVH$=cR4vJ+peBp5_U2TlDru`a?0blb1CX}&nfUA#y zF%}lv+5lN+)eIQNhn(-Znaj;H2E21|hY)S(6RgUynXejQ^+(L_|l@rfk zJGA~H1iLLrB54)L#OeoAeBA3PP05KT=OPYbY4^Y5iwA@8*0Kod+jk>Lswjf2C6Q2B z+Ld1aVO`OUkQBVKb#;;bi4`<@cpgz64aM8Pn#1vaC9p+*Ro}DV6!9(S2S?*h(Ue&n zi3|?(P%>rWS2suE(qm;Fo2%at2RsO+%M*c~vX_l_xmt1Is7OEft!$nhj@(X_+y7o#7xrGz}XZ=tUaHZNY5wqJkw)wj4!-3v;-D~C!fFR2wCsP@C60hje>ze>Xwg|=uHmW|Gz{|0FqJF$D0PjQiR zB$3$M(O(`Zr&hz9!SbOeJUnfMpmIm+9)XcQKbhTZH~Cp7g1RF{4fY!Si|g+H0*ZIi_YsY3A6sa2dmq~L&=mX z7-7B-i#yuE+@)@qqO7J}^4nrcpNY7E`iR{QhT+(x43bV)`Sgap{#KVyEo7O;O)Pdvo3P^B1wlPvFIaOfF4SmiDL>~$I$lR5=Y zCU=EBJI`T7Sh{|tU>z1-b;Ay>USeDCD4cn#gbqBN0{!@Ud+>N1c&uHDu*nKS=6w%w zeLEJ1nfjwip#yYz5JZ2rq@*;bJ^nFnulQ1ICwe0(3$!D?B3_qvl8ozT>HPd^>OJe9 zbo29L#CPEZDxJ8JM34GQtBmYJ|?a|H&L9f9(bU1*P`m*_W{sW{z|?_wNOoxmj+MMdJkd!Gs5+44(7x-3N2Zdv0<6oeP61L zP39PZ&@^Hc<7nU{G9Aw)b+KxdG+w5i#`a-8;eC3%=i|iI+w+@f9ZQtf5y^qEqK+y_D17(nZ$#mB)mF~|nZ^U*5OzhQ*lN#%bnC3!_<#RN1V#+p-@4dZSI{!eY^ z-Y}qVy<=L%s$zB0X5>}oDWEw;AZiRt~JdgvWC^* zEg@o<0}U$zPx!`;eM5R=+0qB6U17_!!*KVgnr0tT)4Z<7VeJfWcozMH4!+Gmn~{~Y zi|!G9UNVzJ->Je5$FHEnvsS3xl8RQhbBVW3iP5EhN7>p!xZ8gU9p;}Qc8lJRJEB^k zyw^?f;m{R0C$g9%9%+X+CS=hB>-MN?TM09}PAp<$(lKy(2(0s7kK?b6BeTxGMiaqy ze3LbrxY~%J*X+JTaPgsjglH`tB+=u3-yv90@qm^ErGx&_ZW6HLI)*>WBdI-)(j$Jo z@uyWC;Ka+hFiEL`7w5y#acmBaTDOga>XRTe{5g$rCRDzD5&Ao4<6hHX@X0QK7Rf90 zCE65->~WARb92QC;Xxw5S^}Yz)f1mGxVx=;@jzt=6pj2&Jg03ob*L;9Z&kj*9Zx6G z#0&e$=3aV;EHFn+bs)NFP4L9{sbC+nl}yaB!Y?^9SaYUS+_8El6d(K)#~sP1lMk2C z#IQnayMUn&&cM_*Wew89v~741w7$1vKjcCYFkJ}n6+%^3|>{8xh5DFlDoI0Gyvl#>MsA>e%c8Cd$xrkAQDIOISKKCKAG zl)D@BNjr9tY1Cq)T=WTs3OtSVFBh^z9g!01=W|i z!$ALMB(!^$KIw-En0Dt7uHSHrR(V8%Vt6Ep)@{V`;r&U-sH0@Iw;Y|PIuq|WmH3kX za=h2~09mo56~t}IfZuP-hgswIU_cuUMqD(5 z+~C#JP4vexUU2QgNL-ncK$oY!qMHix#nD|`OELwI5qI^4r1;a=a??{~7w z*9GX>aWHm#-5Y|chha$mM3S*>n0VBWZYXoNmZXhxfhX%si?3G_a(-%0WQRt;mK%Qf zu}_=A4#5K6Pi>{1?5jxlC#~@;6UyI%fQxRdgymbidvy1njpx=pB!TD0kTd!1kgHlOF84i3%VysbbE!Qc zTW|vOnjFlTE0maBt;R+1f1}UWIYO_iONd8oCp76ej^<4Lovy#;i1WYv42q8LC-dd~ zvA6$Zm>9l+6t)e9$i7{`d}tV2DqG>^K!9PM9pLQH9Yy0uKM~L8PYH%jZ;LBh+Q8}- zLGBqC|Pu^qM&B>S6@-a=3 zktV_FwlyP?LnwDQO8z@D9r>`@zZX`Gvf@QujfRpBX@cH=*fW4J#t;9p$_ZkOGsC|M zSR*hT7#mK_-+jCzE9@C2p1&lClj@Q*Tt4KbyW(eFQ}&{EyvO_ZEPm!@HLqOwL#ZY&Sp3XSoy>&) z$qbPLV{OpkTDGRVEF0CTKxo#q+?KmA^>;jM&1^6=azoD5pvJm&!HwJ#5J8bmK(uTzAx{EA5=l_O)}MyvDxGWh53Z@4 zI!UjRX*sn#RiD|kmbKGyGcx!aayr&Vm#9hSbRLW!Cr^vySD!)k!+W?srGoE~d+>WTle#S*Mrlc}SZ;xn9MkGl7hwR<6^_64Nz0 zooqUnDI3amH*L4Y5?LB2S9>rKoH8Sk3*ol0NT z){#bs#0+D_sg-n<|k?TV;Qkkjd zm;50o*zRq++H^c2Ogm<*la5c$R6YpC_%x)Nhg}pO8J#gSVkbtZ<HpFiSN4THe-4dABWvWMnDMQm!+JAsiY(coFF^=I0)~faERC08 zc&CIh6U;>BFM|b{;iz^UYmA3k$ATHQ8Q;A)v-Uk}8%^QGyr?m@2h*JoluN5XS@PlN ztcc?8+A615!uarOMzmTZV0y9cCi}L7tcYgJrq%z7o6^^urm!6ud)`}-aXVT8rJ77b%!-WZQJTOs|>FD_pYD)mWY4 zE=)&WGE+18iyEz#r=AAl6fflk^hLqEp3{*DU|3W6G!xL)iqCd2nYECz--XndF>lbc z?nT{lGmQ22l9Dvg$)qNT7Pbtl&PbD~IJr_MldF{trQf7wjfCGIl&x5*K`ENSws5^k z4Vpo)?q;`XMv5buSD3QSrW`hMePe_1DuSN$O3*GQ(8k*RW3X);vPs+jaJLlPbV4mc z`KMbZzH_i;LfU9L+Uum6&fVnv?>&rn(}6a-#&k+nC+lVHv;}eI*Y#=;sfKpZd>iw} zT7uAaHQWp>zCyyo+r=|Wi0vb?CN_u}W2Vvn(<1u+ljA{*lj$6r612@eK;iSd7QN04 z<(c}mmvp5)!S1Y>&{k%*3D(FHW<*w`@HQ)Vy%=}it?nc;6s{Sm`2XV z9KysA=VyuqKo67NXC3-1>9@i5Ky-S&&OBb?fj3x(Q_v` zy%Er>pc1NY-j#U&v>JK`=1V4Avyuw-UWSYZbLr*JXG7d@Ply<@7e-poLU!vA&}Zer zRQD?=%vgg}?+eO5y}y8dzi&ABgp`9Nu90NUJPqRFeW(?k$6x0s8b)oeAop%t8|uS_ z<;!vp7%n{h7@4ADYKsT!gRt!N z3x+fA=i-x62Xu^Dhi>ou0=Zg8iFfZeaFJUqk=oxhTo|jMwxis@=B_W?J!Om4Dra0# zT|i4BoXe{o9>%iAdmv=-83?s{k4n}#!{E5hL}5QxIy!U-h*w#QM54}oem0xX`Mf;) ztAGX<>QS`iBp#g-PTt(&i^=u}@Z@|?6l?R*ukIXp?w7-!&4RM8)dmRA6-s)(8;7Nj zkAUEb9n7i7#lAoK(fR$S;;R4b!}@M%P?1yzV}$#$+}#o8E%m}|RXy!p+!eD2$Kxg% zAn`gBg%dOLNmgZu;lv(2b&Xcx_+n>_ko!Up9}DQ@n}ucSa|7K^)efCF0x z;9-`9I^`IgYI6(2BTwU@I)BOA&vMCxoFqJv)d%+NK8rO`d4`pO4On`~3wwC_NgVv+ zaMtw-`sS%@7|Q3{o%@r(XZ=!yt+o&`|8qzk;$9wQ8H^UC&d~c#82#3UlFGvF_}ipU zCFhfS(yN)PKsV-n;&*;G$-i=jeo$Ob{b&D+ZhQO{30!!N%Hmg&gleJGRv?4LcR!}` z!|TBJ{%N{RD=4oQ`9uF1Bz5vw z2=_DKhtrlrQdkKo+;Wya|E)KOBAujOqeqeaL32sWusbwq$=i5sX%vL@kHv^(%O$x> z>S&G6V{+u%outR9T+l2Pp#7kH>bEotRaw{Qhl36i!7*R3bxy^(g$b~5ehC;#s!7eL z2jI{(0BgcGv~e0QS3e##PJkgLtHIE!FQEEFZ`yz9 zdHPX74o*2!jbWt`P}*;`MC(?BeUBEQc)ZBU(bjLktD3#vpIHUD{^!L@z*DpFC2ah# z5RYhpYV%QF(@G=X8(*Tv4~u5x-C8C2BAGbxIqAR3f!;ag3ELkXfm=s4blqVME$VX& z*3a~ZM+pz;@EZ(t8Cy$x>+jLW6|+df%{uIH>>@fp>V&%OIcR%h3-Q;hFro7AC|_R+ zw}vIrQNj5VuY?1*E3N}7240gKiClqmW6Md#7v1pclvOm{t~=_x*21hl@nvjc9)>QD zfDQf|aq{I!WcGK@&_b{ipRXEEJnbbgaLy1S`2Mb8jCehLOKQLafg`Y{<_@h2%LBu` zy(DDU6^y=DL~;fkrC$UM!f#ghfa5>UgQ+StJozpfT_zUNxD7iaqh6-Ia#P&Z#mU($%jpz_jT&jRb${I*O72Mj!~z%zfEqFhhz4eDqh zYgVE4xaF{SE>AK!K~Q*7g%ykU!v)QL^!IUrw4wIWV=LapYfl|f=yF0*bq zQXpAYpNId*dPEXs(^0CdgrIrj5N>RzJN?4pNx>%cyQ3od@M5q!auRnqN=RI%`DCHz zI7o^r#6!Xz_+*ncOzpUqtdnMw9r8~}cbCCmQMA@2tg} z&iz3w776$wF*Ak-Iku{T^(LZ!dGO*B*^rxwQ|FF{i@_^F;u?Y9Y@P`=Q>w{=^ayY} z_6TeO=g{+YQXFwG5g*osWA?30hRj`i$n>3y$haesaLsy>kILbn&^;%QO1lo0mJRF$ zCF)qTbY6g4Y4BtgF!Cr1w> z5#x@MIsOWCOK~IqscOmj;N|$^kb`8!l1`AiH6MPx`T@+IybnXVXfft{E6B>T0`Yk( z=(l2~^ol17!(*1g%#nIndDsh{rfj8OPw<1w=f>j7>~y+3=PBJe`9Cc{Pxs;I{%jD0)sMo6;&_t3W0Yjv*Ip?1wv*GylT5VXlN6&e< zDLjY99o&o;9-W3CIv&J%Ay??d%hsf5&gU@c*g;%$btNqSq@Pbe|2cSe?OhW3-2`&F zxEpeHizU^8M`_iZA0=GQ09YqD4hC%@7S0n%tuEE$qO`x!#~WN=;H4$RC%GqDxKE;m zDZkQm=@3TG+)ALIZ!ds%LL*@yUj+BM6@%1qx6kaAgJJxILh4ap zL`M2sQ}H;FkWb`S4hTWLOeAlj>X*~}|1vBqfUz?@gx0afNzZ2MWW!h?D7 cKQh)h7+acoKyKy7&mE3VeojowLB-MXzj%F;%m4rY literal 0 HcmV?d00001 From e12421be3054bdbeefa974a51bd2adf836016871 Mon Sep 17 00:00:00 2001 From: wejoncy Date: Tue, 10 Dec 2024 09:35:05 +0800 Subject: [PATCH 04/13] [CoreML] more performace flag (#22975) ### Description refactor unsquzee's implementation add more flags to boost peformance. add profile flag ### Motivation and Context --------- Co-authored-by: jicwen Co-authored-by: wejoncy Co-authored-by: Scott McKay --- .../coreml/coreml_provider_factory.h | 12 +++ .../builders/impl/batch_norm_op_builder.cc | 2 +- .../builders/impl/reduction_op_builder.cc | 5 +- .../builders/impl/squeeze_op_builder.cc | 46 +++++++--- .../coreml/builders/model_builder.cc | 6 +- .../providers/coreml/builders/model_builder.h | 5 +- .../core/providers/coreml/coreml_options.cc | 15 ++- .../core/providers/coreml/coreml_options.h | 6 ++ .../core/providers/coreml/model/model.h | 3 +- .../core/providers/coreml/model/model.mm | 92 +++++++++++++++++-- .../core/providers/coreml/model/model_stub.cc | 3 +- .../test/perftest/command_args_parser.cc | 3 + onnxruntime/test/perftest/ort_test_session.cc | 11 ++- 13 files changed, 173 insertions(+), 36 deletions(-) diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h index 3963b80de58a4..d035fd34bd072 100644 --- a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h +++ b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h @@ -47,8 +47,20 @@ enum COREMLFlags { // and SessionOptionsAppendExecutionProvider (C API). For the old API, use COREMLFlags instead. static const char* const kCoremlProviderOption_MLComputeUnits = "MLComputeUnits"; static const char* const kCoremlProviderOption_ModelFormat = "ModelFormat"; +// same as COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES static const char* const kCoremlProviderOption_RequireStaticInputShapes = "RequireStaticInputShapes"; static const char* const kCoremlProviderOption_EnableOnSubgraphs = "EnableOnSubgraphs"; +// provided by https://developer.apple.com/documentation/coreml/mloptimizationhints-swift.struct/specializationstrategy-swift.property +// Core ML segments the model’s compute graph and specializes each segment for the target compute device. +// This process can affect the model loading time and the prediction latency. +// Use this option to tailor the specialization strategy for your model. +static const char* const kCoremlProviderOption_SpecializationStrategy = "SpecializationStrategy"; +// Profile the Core ML MLComputePlan. +// This logs the hardware each operator is dispatched to and the estimated execution time. +// Intended for developer usage but provide useful diagnostic information if performance is not as expected. +static const char* const kCoremlProviderOption_ProfileComputePlan = "ProfileComputePlan"; +// please refer to https://developer.apple.com/documentation/coreml/mlmodelconfiguration/allowlowprecisionaccumulationongpu +static const char* const kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU = "AllowLowPrecisionAccumulationOnGPU"; #ifdef __cplusplus extern "C" { diff --git a/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc index cc68fa6ec399a..442194cb31cbc 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc @@ -151,7 +151,7 @@ bool BatchNormalizationOpBuilder::IsOpSupportedImpl(const Node& node, const OpBu return false; } -#if defined(TARGET_OS_IOS) && defined(TARGET_CPU_X86_64) +#if defined(TARGET_OS_IOS) && defined(TARGET_CPU_X86_64) && TARGET_OS_IOS && TARGET_CPU_X86_64 // To Pass IOS pipeline https://dev.azure.com/onnxruntime/onnxruntime/_build?definitionId=134&_a=summary auto input_dtype = input_defs[0]->TypeAsProto()->tensor_type().elem_type(); if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && input_params.coreml_version < 7) { diff --git a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc index f161b309a2425..d533b867bd454 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc @@ -133,9 +133,8 @@ bool ReductionOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInpu return false; } -#if defined(TARGET_OS_IOS) && defined(TARGET_CPU_X86_64) - // to pass https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1563483&view=logs&j=f7cc61a9-cc70-56e7-b06c-4668ca17e426 - // ReductionOpTest.ReduceSum_half_bert +#if defined(TARGET_OS_IOS) && defined(TARGET_CPU_X86_64) && TARGET_OS_IOS && TARGET_CPU_X86_64 + // skip ReductionOpTest.ReduceSum_half_bert because reduce_sum will output all zeros int32_t input_type; GetType(*input_defs[0], input_type, logger); if (node.OpType() == "ReduceSum" && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { diff --git a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc index c8df7c1a43f65..a1b3a18265c70 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc @@ -13,6 +13,10 @@ #include "core/optimizer/initializer.h" #include "core/providers/cpu/tensor/unsqueeze.h" +#ifdef __APPLE__ +#include +#endif + namespace onnxruntime { namespace coreml { @@ -54,32 +58,50 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const } } +#if defined(COREML_ENABLE_MLPROGRAM) +void HandleX86ArchUnsqueezeScalarInput(ModelBuilder& model_builder, + const Node& node, const logging::Logger& logger) { + const auto& input_defs(node.InputDefs()); + TensorShapeVector axes; + GetAxes(model_builder, node, axes); + + std::vector input_shape; + GetShape(*input_defs[0], input_shape, logger); + auto op = model_builder.CreateOperation(node, "reshape"); + AddOperationInput(*op, "x", input_defs[0]->Name()); + TensorShapeVector output_shape = UnsqueezeBase::ComputeOutputShape(TensorShape(input_shape), axes); + AddOperationInput(*op, "shape", model_builder.AddConstant(op->type(), "shape", AsSpan(output_shape))); + AddOperationOutput(*op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(op)); +} +#endif + Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, [[maybe_unused]] const logging::Logger& logger) const { std::unique_ptr layer = model_builder.CreateNNLayer(node); - const auto& input_defs(node.InputDefs()); auto* coreml_squeeze = layer->mutable_squeeze(); TensorShapeVector axes; GetAxes(model_builder, node, axes); - std::vector input_shape; - GetShape(*input_defs[0], input_shape, logger); #if defined(COREML_ENABLE_MLPROGRAM) + const auto& input_defs(node.InputDefs()); if (model_builder.CreateMLProgram()) { using namespace CoreML::Specification::MILSpec; - std::string_view coreml_op_type = node.OpType() == "Squeeze" ? "squeeze" : "reshape"; +#if defined(TARGET_CPU_X86_64) && TARGET_CPU_X86_64 + // expand_dims has limited requirements for static shape, however, X86_64 has a bug that it can't handle scalar input + if (node.OpType() == "Unsqueeze" && input_defs[0]->Shape()->dim_size() < 2) { + HandleX86ArchUnsqueezeScalarInput(model_builder, node, logger); + return Status::OK(); + } +#endif + std::string_view coreml_op_type = node.OpType() == "Squeeze" ? "squeeze" : "expand_dims"; std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); AddOperationInput(*op, "x", input_defs[0]->Name()); - if (coreml_op_type == "squeeze") { - if (!axes.empty()) { - // coreml squeeze op does support negative axes - AddOperationInput(*op, "axes", model_builder.AddConstant(op->type(), "axes", AsSpan(axes))); - } - } else { - TensorShapeVector output_shape = UnsqueezeBase::ComputeOutputShape(TensorShape(input_shape), axes); - AddOperationInput(*op, "shape", model_builder.AddConstant(op->type(), "shape", AsSpan(output_shape))); + if (!axes.empty()) { + // coreml supports negative axes + AddOperationInput(*op, "axes", model_builder.AddConstant(op->type(), "axes", AsSpan(axes))); } AddOperationOutput(*op, *node.OutputDefs()[0]); model_builder.AddOperation(std::move(op)); diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index 2a02c1f4124f6..6486942199df7 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -408,7 +408,7 @@ ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logge : graph_viewer_(graph_viewer), logger_(logger), coreml_version_(coreml_version), - coreml_compute_unit_(coreml_options.ComputeUnits()), + coreml_options_(coreml_options), create_ml_program_(coreml_options.CreateMLProgram()), model_output_path_(GetModelOutputPath(create_ml_program_)), onnx_input_names_(std::move(onnx_input_names)), @@ -989,7 +989,7 @@ Status ModelBuilder::LoadModel(std::unique_ptr& model) { get_sanitized_io_info(std::move(input_output_info_)), std::move(scalar_outputs_), std::move(int64_outputs_), - logger_, coreml_compute_unit_); + logger_, coreml_options_); } else #endif { @@ -999,7 +999,7 @@ Status ModelBuilder::LoadModel(std::unique_ptr& model) { std::move(input_output_info_), std::move(scalar_outputs_), std::move(int64_outputs_), - logger_, coreml_compute_unit_); + logger_, coreml_options_); } return model->LoadModel(); // load using CoreML API, including compilation diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h index af47869f7e1c3..e19597cf0dc2e 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.h +++ b/onnxruntime/core/providers/coreml/builders/model_builder.h @@ -7,6 +7,7 @@ #include "core/graph/graph_viewer.h" #include "core/providers/coreml/builders/coreml_spec.h" #include "core/providers/coreml/model/model.h" +#include "core/providers/coreml/coreml_options.h" #if defined(COREML_ENABLE_MLPROGRAM) // coremltools classes @@ -22,8 +23,6 @@ class StorageWriter; #endif namespace onnxruntime { -class CoreMLOptions; - namespace coreml { class IOpBuilder; @@ -218,7 +217,7 @@ class ModelBuilder { const GraphViewer& graph_viewer_; const logging::Logger& logger_; const int32_t coreml_version_; - const uint32_t coreml_compute_unit_; + CoreMLOptions coreml_options_; const bool create_ml_program_; // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old) const std::string model_output_path_; // create_ml_program_ ? dir for mlpackage : filename for mlmodel diff --git a/onnxruntime/core/providers/coreml/coreml_options.cc b/onnxruntime/core/providers/coreml/coreml_options.cc index df78f74383871..4ec780208e528 100644 --- a/onnxruntime/core/providers/coreml/coreml_options.cc +++ b/onnxruntime/core/providers/coreml/coreml_options.cc @@ -63,11 +63,14 @@ void CoreMLOptions::ValidateAndParseProviderOption(const ProviderOptions& option {"MLProgram", COREML_FLAG_CREATE_MLPROGRAM}, {"NeuralNetwork", COREML_FLAG_USE_NONE}, }; - std::unordered_set valid_options = { + const std::unordered_set valid_options = { kCoremlProviderOption_MLComputeUnits, kCoremlProviderOption_ModelFormat, kCoremlProviderOption_RequireStaticInputShapes, kCoremlProviderOption_EnableOnSubgraphs, + kCoremlProviderOption_SpecializationStrategy, + kCoremlProviderOption_ProfileComputePlan, + kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU, }; // Validate the options for (const auto& option : options) { @@ -90,6 +93,16 @@ void CoreMLOptions::ValidateAndParseProviderOption(const ProviderOptions& option require_static_shape_ = option.second == "1"; } else if (kCoremlProviderOption_EnableOnSubgraphs == option.first) { enable_on_subgraph_ = option.second == "1"; + } else if (kCoremlProviderOption_SpecializationStrategy == option.first) { + if (option.second != "Default" && option.second != "FastPrediction") { + ORT_THROW("Invalid value for option ", option.first, ": ", option.second, + ". Valid values are Default and FastPrediction."); + } + strategy_ = option.second; + } else if (kCoremlProviderOption_ProfileComputePlan == option.first) { + profile_compute_plan_ = option.second == "1"; + } else if (kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU == option.first) { + allow_low_precision_accumulation_on_gpu_ = option.second == "1"; } } } diff --git a/onnxruntime/core/providers/coreml/coreml_options.h b/onnxruntime/core/providers/coreml/coreml_options.h index 8bb748fcd69c9..fd05c96927bd1 100644 --- a/onnxruntime/core/providers/coreml/coreml_options.h +++ b/onnxruntime/core/providers/coreml/coreml_options.h @@ -14,6 +14,9 @@ class CoreMLOptions { bool create_mlprogram_{false}; bool enable_on_subgraph_{false}; uint32_t compute_units_{0}; + std::string strategy_; + bool profile_compute_plan_{false}; + bool allow_low_precision_accumulation_on_gpu_{false}; public: explicit CoreMLOptions(uint32_t coreml_flags); @@ -25,6 +28,9 @@ class CoreMLOptions { bool CreateMLProgram() const { return create_mlprogram_; } bool EnableOnSubgraph() const { return enable_on_subgraph_; } uint32_t ComputeUnits(uint32_t specific_flag = 0xffffffff) const { return compute_units_ & specific_flag; } + bool AllowLowPrecisionAccumulationOnGPU() const { return allow_low_precision_accumulation_on_gpu_; } + bool UseStrategy(std::string_view strategy) const { return strategy_ == strategy; } + bool ProfileComputePlan() const { return profile_compute_plan_ && create_mlprogram_; } private: void ValidateAndParseProviderOption(const ProviderOptions& options); diff --git a/onnxruntime/core/providers/coreml/model/model.h b/onnxruntime/core/providers/coreml/model/model.h index 68ecbe5fb80c4..84b7d741b4714 100644 --- a/onnxruntime/core/providers/coreml/model/model.h +++ b/onnxruntime/core/providers/coreml/model/model.h @@ -18,6 +18,7 @@ #endif namespace onnxruntime { +class CoreMLOptions; namespace coreml { class Execution; @@ -53,7 +54,7 @@ class Model { std::unordered_map&& input_output_info, std::unordered_set&& scalar_outputs, std::unordered_set&& int64_outputs, - const logging::Logger& logger, uint32_t coreml_compute_unit); + const logging::Logger& logger, const CoreMLOptions& coreml_options); ~Model(); ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Model); diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index c8edb64ff55d7..755dbfbd6e68c 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -25,6 +25,7 @@ #include "core/providers/coreml/model/host_utils.h" #include "core/providers/coreml/model/objc_str_utils.h" #include "core/providers/coreml/shape_utils.h" +#include "core/providers/coreml/coreml_options.h" // force the linker to create a dependency on the CoreML framework so that in MAUI usage we don't need // to manually do this @@ -300,6 +301,53 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array, return Status::OK(); } +// since __clang_major__ >= 15, MLComputePlan is introduced in +// We are actually ensure the MacOS/IOS version and Xcode version is greater than `macOS 14.4, iOS 17.4`. +// The macro API_AVAILABLE should also be fine. +// Otherwise, the compiler will complain `MLComputePlan` is not defined. +// we define __clang_analyzer__ here is for bypass static analysis +void ProfileComputePlan(NSURL* compileUrl, MLModelConfiguration* config) { +#if defined(__APPLE__) && defined(__clang__) && __clang_major__ >= 15 && !defined(__clang_analyzer__) + if (@available(macOS 14.4, iOS 17.4, *)) { + [MLComputePlan loadContentsOfURL:compileUrl + configuration:config + completionHandler:^(MLComputePlan* _Nullable computePlan, NSError* _Nullable error) { + if (!computePlan) { + NSLog(@"Error loading compute plan: %@", error); + // Handle error. + return; + } + MLModelStructureProgram* program = computePlan.modelStructure.program; + if (!program) { + NSLog(@"Error loading program from compute plan., this is not a mlprogram model"); + return; + } + + MLModelStructureProgramFunction* mainFunction = program.functions[@"main"]; + if (!mainFunction) { + NSLog(@"Error loading main function from program"); + return; + } + + NSArray* operations = mainFunction.block.operations; + NSLog(@"Number of operations, 'const' node is included. : %lu", operations.count); + for (MLModelStructureProgramOperation* operation in operations) { + // Get the compute device usage for the operation. + MLComputePlanDeviceUsage* computeDeviceUsage = [computePlan computeDeviceUsageForMLProgramOperation:operation]; + id preferredDevice = computeDeviceUsage.preferredComputeDevice; + // Get the estimated cost of executing the operation. + MLComputePlanCost* estimatedCost = [computePlan estimatedCostOfMLProgramOperation:operation]; + if (![operation.operatorName isEqualToString:@"const"]) { + NSLog(@"Operation: %@, Device Usage: %@, Estimated Cost: %f", operation.operatorName, preferredDevice, estimatedCost.weight); + } + } + }]; + } else { + NSLog(@"iOS 17.4+/macOS 14.4+ or later is required to use the compute plan API"); + } +#endif +} + // Internal Execution class // This class is part of the model class and handles the calls into CoreML. Specifically, it performs // 1. Compile the model by given path for execution @@ -307,7 +355,7 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array, // 3. The compiled model will be removed in dealloc or removed using cleanup function class Execution { public: - Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags); + Execution(const std::string& path, const logging::Logger& logger, const CoreMLOptions& coreml_options); ~Execution(); Status LoadModel(); @@ -320,13 +368,13 @@ Status Predict(const std::unordered_map& inputs, NSString* coreml_model_path_{nil}; NSString* compiled_model_path_{nil}; const logging::Logger& logger_; - uint32_t coreml_compute_unit_{0}; + CoreMLOptions coreml_options_; MLModel* model_{nil}; }; -Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_compute_unit) +Execution::Execution(const std::string& path, const logging::Logger& logger, const CoreMLOptions& coreml_options) : logger_(logger), - coreml_compute_unit_(coreml_compute_unit) { + coreml_options_(coreml_options) { @autoreleasepool { coreml_model_path_ = util::Utf8StringToNSString(path.c_str()); } @@ -395,17 +443,41 @@ Status Predict(const std::unordered_map& inputs, compiled_model_path_ = [compileUrl path]; MLModelConfiguration* config = [[MLModelConfiguration alloc] init]; - - if (coreml_compute_unit_ & COREML_FLAG_USE_CPU_ONLY) { + uint32_t coreml_compute_unit = coreml_options_.ComputeUnits(); + if (coreml_compute_unit & COREML_FLAG_USE_CPU_ONLY) { config.computeUnits = MLComputeUnitsCPUOnly; - } else if (coreml_compute_unit_ & COREML_FLAG_USE_CPU_AND_GPU) { + } else if (coreml_compute_unit & COREML_FLAG_USE_CPU_AND_GPU) { config.computeUnits = MLComputeUnitsCPUAndGPU; - } else if (coreml_compute_unit_ & COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE) { + } else if (coreml_compute_unit & COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE) { config.computeUnits = MLComputeUnitsCPUAndNeuralEngine; // Apple Neural Engine } else { config.computeUnits = MLComputeUnitsAll; } + if (coreml_options_.AllowLowPrecisionAccumulationOnGPU()) { + config.allowLowPrecisionAccumulationOnGPU = YES; + } + +// Set the specialization strategy to FastPrediction for macOS 10.15+ +// since __clang_major__ >= 15, optimizationHints is introduced in +// Same as above comments for why we are checking __clang_major__. +// we define __clang_analyzer__ here is for bypass static analysis +#if defined(__APPLE__) && defined(__clang__) && __clang_major__ >= 15 && !defined(__clang_analyzer__) + if (HAS_COREML8_OR_LATER) { + MLOptimizationHints* optimizationHints = [[MLOptimizationHints alloc] init]; + if (coreml_options_.UseStrategy("FastPrediction")) { + optimizationHints.specializationStrategy = MLSpecializationStrategyFastPrediction; + config.optimizationHints = optimizationHints; + } else if (coreml_options_.UseStrategy("Default")) { + optimizationHints.specializationStrategy = MLSpecializationStrategyDefault; + config.optimizationHints = optimizationHints; + } + } +#endif + if (coreml_options_.ProfileComputePlan()) { + ProfileComputePlan(compileUrl, config); + } + model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error]; if (error != nil || model_ == nil) { @@ -524,8 +596,8 @@ Status Predict(const std::unordered_map& inputs, std::unordered_set&& scalar_outputs, std::unordered_set&& int64_outputs, const logging::Logger& logger, - uint32_t coreml_flags) - : execution_(std::make_unique(path, logger, coreml_flags)), + const CoreMLOptions& coreml_options) + : execution_(std::make_unique(path, logger, coreml_options)), model_input_names_(std::move(model_input_names)), model_output_names_(std::move(model_output_names)), input_output_info_(std::move(input_output_info)), diff --git a/onnxruntime/core/providers/coreml/model/model_stub.cc b/onnxruntime/core/providers/coreml/model/model_stub.cc index c6f2e7401ea1e..e9036e2fc7e1a 100644 --- a/onnxruntime/core/providers/coreml/model/model_stub.cc +++ b/onnxruntime/core/providers/coreml/model/model_stub.cc @@ -4,6 +4,7 @@ #include "core/providers/coreml/model/model.h" namespace onnxruntime { +class CoreMLOptions; namespace coreml { class Execution {}; @@ -15,7 +16,7 @@ Model::Model(const std::string& /*path*/, std::unordered_set&& scalar_outputs, std::unordered_set&& int64_outputs, const logging::Logger& /*logger*/, - uint32_t /*coreml_flags*/) + const CoreMLOptions& /*coreml_flags*/) : execution_(std::make_unique()), model_input_names_(std::move(model_input_names)), model_output_names_(std::move(model_output_names)), diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 3f2c2cb7f761c..23c3812ebd025 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -135,6 +135,9 @@ namespace perftest { "\t [CoreML only] [MLComputeUnits]:[CPUAndNeuralEngine CPUAndGPU ALL CPUOnly] Specify to limit the backend device used to run the model.\n" "\t [CoreML only] [AllowStaticInputShapes]:[0 1].\n" "\t [CoreML only] [EnableOnSubgraphs]:[0 1].\n" + "\t [CoreML only] [SpecializationStrategy]:[Default FastPrediction].\n" + "\t [CoreML only] [ProfileComputePlan]:[0 1].\n" + "\t [CoreML only] [AllowLowPrecisionAccumulationOnGPU]:[0 1].\n" "\t [Example] [For CoreML EP] -e coreml -i \"ModelFormat|MLProgram MLComputeUnits|CPUAndGPU\"\n" "\n" "\t [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 5db1894a5074b..a96028ed3903e 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -346,7 +346,10 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); static const std::unordered_set available_keys = {kCoremlProviderOption_MLComputeUnits, kCoremlProviderOption_ModelFormat, kCoremlProviderOption_RequireStaticInputShapes, - kCoremlProviderOption_EnableOnSubgraphs}; + kCoremlProviderOption_EnableOnSubgraphs, + kCoremlProviderOption_SpecializationStrategy, + kCoremlProviderOption_ProfileComputePlan, + kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU}; ParseSessionConfigs(ov_string, provider_options, available_keys); std::unordered_map available_options = { @@ -364,6 +367,12 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); (provider_option.second == "1" || provider_option.second == "0")) { } else if (provider_option.first == kCoremlProviderOption_EnableOnSubgraphs && (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_SpecializationStrategy && + (provider_option.second == "Default" || provider_option.second == "FastPrediction")) { + } else if (provider_option.first == kCoremlProviderOption_ProfileComputePlan && + (provider_option.second == "0" || provider_option.second == "1")) { + } else if (provider_option.first == kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU && + (provider_option.second == "0" || provider_option.second == "1")) { } else { ORT_THROW("Invalid value for option ", provider_option.first, ": ", provider_option.second); } From 708ee8556e73c76993aaddf5558da6118e8aa510 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 10 Dec 2024 11:54:14 +1000 Subject: [PATCH 05/13] Reduce default logger usage (#23030) ### Description We have use cases where multiple sessions are created concurrently. Minimizing the usage of the default logger is important for these scenarios. Wire through the session logger to as many places as possible. The EP logger can also be used once the session is created (can't be used during EP construction/kernel registration but can be used in GetCapability and Compile). ### Motivation and Context Improve logging when there are concurrent sessions. --- .../core/framework/kernel_registry.h | 12 ++- .../core/optimizer/graph_transformer_utils.h | 2 + .../core/framework/allocation_planner.cc | 79 ++++++++++--------- .../core/framework/fallback_cpu_capability.cc | 11 +-- .../core/framework/fallback_cpu_capability.h | 6 +- .../core/framework/graph_partitioner.cc | 51 +++++++----- onnxruntime/core/framework/kernel_lookup.h | 9 ++- onnxruntime/core/framework/kernel_registry.cc | 12 ++- .../core/framework/kernel_registry_manager.cc | 14 ++-- .../core/framework/kernel_registry_manager.h | 5 +- onnxruntime/core/framework/session_state.cc | 4 +- .../core/optimizer/constant_folding.cc | 7 +- .../core/optimizer/graph_transformer_utils.cc | 8 +- .../core/optimizer/insert_cast_transformer.cc | 16 ++-- .../core/optimizer/nhwc_transformer.cc | 16 ++-- onnxruntime/core/optimizer/nhwc_transformer.h | 3 +- .../optimizer/optimizer_execution_frame.cc | 19 +++-- .../optimizer/optimizer_execution_frame.h | 7 +- .../qdq_transformer/avx2_weight_s8_to_u8.cc | 8 +- .../selectors_actions/shared/utils.cc | 9 ++- .../selectors_actions/shared/utils.h | 8 +- .../core/optimizer/transformer_memcpy.cc | 47 +++++++---- .../providers/cann/cann_execution_provider.cc | 6 +- .../providers/cuda/cuda_execution_provider.cc | 2 +- .../src/DmlGraphFusionTransformer.cpp | 3 +- .../src/DmlRuntimeGraphFusionTransformer.cpp | 3 +- .../src/ExecutionProvider.cpp | 7 +- .../src/ExecutionProvider.h | 3 +- .../providers/js/js_execution_provider.cc | 2 +- .../nnapi_builtin/builders/model_builder.cc | 14 +++- .../nnapi_builtin/builders/model_builder.h | 10 ++- .../nnapi_builtin/nnapi_execution_provider.cc | 17 ++-- .../qdq_transformations/qdq_stripping.cc | 2 +- .../core/providers/qnn/builder/qnn_model.cc | 2 +- .../providers/qnn/qnn_execution_provider.cc | 2 +- .../providers/rocm/rocm_execution_provider.cc | 2 +- .../providers/shared_library/provider_api.h | 7 +- .../provider_bridge_provider.cc | 5 +- .../shared_library/provider_interfaces.h | 5 +- .../core/providers/vsinpu/vsinpu_ep_graph.cc | 5 +- .../core/providers/vsinpu/vsinpu_ep_graph.h | 3 +- .../vsinpu/vsinpu_execution_provider.cc | 26 +++--- .../webgpu/webgpu_execution_provider.cc | 3 +- .../xnnpack/xnnpack_execution_provider.cc | 3 +- onnxruntime/core/session/inference_session.cc | 13 +-- onnxruntime/core/session/inference_session.h | 8 +- .../core/session/inference_session_utils.cc | 5 +- .../core/session/inference_session_utils.h | 7 +- .../core/session/provider_bridge_ort.cc | 9 ++- .../core/session/standalone_op_invoker.cc | 7 +- .../test/framework/allocation_planner_test.cc | 8 +- .../test/optimizer/graph_transform_test.cc | 12 ++- .../optimizer/graph_transform_utils_test.cc | 11 ++- onnxruntime/test/optimizer/optimizer_test.cc | 22 +++--- .../test/optimizer/qdq_transformer_test.cc | 7 +- onnxruntime/test/providers/base_tester.cc | 7 +- .../providers/kernel_compute_test_utils.cc | 8 +- .../test/providers/partitioning_utils_test.cc | 4 +- .../core/session/training_session.cc | 3 +- .../core/session/training_session.h | 3 +- .../test/gradient/gradient_op_test_utils.cc | 3 +- .../optimizer/graph_transformer_utils_test.cc | 10 ++- 62 files changed, 389 insertions(+), 243 deletions(-) diff --git a/include/onnxruntime/core/framework/kernel_registry.h b/include/onnxruntime/core/framework/kernel_registry.h index 7b3d04ee66d9e..aaf533135429c 100644 --- a/include/onnxruntime/core/framework/kernel_registry.h +++ b/include/onnxruntime/core/framework/kernel_registry.h @@ -8,6 +8,9 @@ #include "core/framework/op_kernel.h" namespace onnxruntime { +namespace logging { +class Logger; +} using KernelCreateMap = std::multimap; using KernelDefHashes = std::vector>; @@ -33,6 +36,7 @@ class KernelRegistry { // Kernel matching uses the types from the node and the kernel_type_str_resolver. Status TryFindKernel(const Node& node, ProviderType exec_provider, const IKernelTypeStrResolver& kernel_type_str_resolver, + const logging::Logger& logger, const KernelCreateInfo** out) const; // map of type constraint name to required type @@ -42,6 +46,7 @@ class KernelRegistry { // Kernel matching uses the explicit type constraint name to required type map in type_constraints. Status TryFindKernel(const Node& node, ProviderType exec_provider, const TypeConstraintMap& type_constraints, + const logging::Logger& logger, const KernelCreateInfo** out) const; /** @@ -61,13 +66,15 @@ class KernelRegistry { std::string_view domain, int version, const KernelRegistry::TypeConstraintMap& type_constraints, + const logging::Logger& logger, const KernelCreateInfo** out) const; static bool HasImplementationOf(const KernelRegistry& r, const Node& node, ProviderType exec_provider, - const IKernelTypeStrResolver& kernel_type_str_resolver) { + const IKernelTypeStrResolver& kernel_type_str_resolver, + const logging::Logger& logger) { const KernelCreateInfo* info; - Status st = r.TryFindKernel(node, exec_provider, kernel_type_str_resolver, &info); + Status st = r.TryFindKernel(node, exec_provider, kernel_type_str_resolver, logger, &info); return st.IsOK(); } @@ -83,6 +90,7 @@ class KernelRegistry { Status TryFindKernelImpl(const Node& node, ProviderType exec_provider, const IKernelTypeStrResolver* kernel_type_str_resolver, const TypeConstraintMap* type_constraints, + const logging::Logger& logger, const KernelCreateInfo** out) const; // Check whether the types of inputs/outputs of the given node match the extra diff --git a/include/onnxruntime/core/optimizer/graph_transformer_utils.h b/include/onnxruntime/core/optimizer/graph_transformer_utils.h index 6cff153c336f0..31b0f22340510 100644 --- a/include/onnxruntime/core/optimizer/graph_transformer_utils.h +++ b/include/onnxruntime/core/optimizer/graph_transformer_utils.h @@ -53,6 +53,7 @@ InlinedVector> GenerateTransformers( TransformerLevel level, const SessionOptions& session_options, const IExecutionProvider& execution_provider /*required by constant folding*/, + const logging::Logger& logger, const InlinedHashSet& rules_and_transformers_to_disable = {}, concurrency::ThreadPool* intra_op_thread_pool = nullptr, std::unordered_map>* p_buffered_tensors = nullptr); @@ -84,6 +85,7 @@ InlinedVector> GenerateTransformersForMinimalB const SessionOptions& session_options, const SatApplyContextVariant& apply_context, const IExecutionProvider& cpu_execution_provider, + const logging::Logger& logger, const InlinedHashSet& rules_and_transformers_to_disable = {}, concurrency::ThreadPool* intra_op_thread_pool = nullptr, std::unordered_map>* p_buffered_tensors = nullptr); diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc index 5dca4cf6c165b..ecd3960107926 100644 --- a/onnxruntime/core/framework/allocation_planner.cc +++ b/onnxruntime/core/framework/allocation_planner.cc @@ -138,7 +138,8 @@ class PlannerImpl { const SubgraphsKernelCreateInfoMaps& subgraphs_kernel_create_info_maps, const InlinedHashMap& outer_scope_node_arg_to_location_map, const OrtValueNameIdxMap& ort_value_name_idx_map, - const ISequentialPlannerContext& context, SequentialExecutionPlan& plan) + const ISequentialPlannerContext& context, SequentialExecutionPlan& plan, + const logging::Logger& logger) : context_(&context), plan_(plan), parent_node_(parent_node), @@ -148,14 +149,15 @@ class PlannerImpl { kernel_create_info_map_(kernel_create_info_map), subgraphs_kernel_create_info_maps_(subgraphs_kernel_create_info_maps), outer_scope_node_arg_to_location_map_(outer_scope_node_arg_to_location_map), - ort_value_name_idx_map_(ort_value_name_idx_map) {} + ort_value_name_idx_map_(ort_value_name_idx_map), + logger_(logger) { + } Status CreatePlan( #ifdef ORT_ENABLE_STREAM const IStreamCommandHandleRegistry& stream_handle_registry, #endif - const PathString& partition_config_file, - const logging::Logger& logger); + const PathString& partition_config_file); private: gsl::not_null context_; @@ -183,6 +185,12 @@ class PlannerImpl { InlinedHashMap> dependence_graph_; InlinedHashMap value_node_map_; + // logger_ is not currently used in a minimal build +#if defined(ORT_MINIMAL_BUILD) && !defined(ORT_EXTENDED_MINIMAL_BUILD) + [[maybe_unused]] +#endif + const logging::Logger& logger_; + // OrtValueInfo: Auxiliary information about an OrtValue used only during plan-generation: struct OrtValueInfo { const onnxruntime::NodeArg* p_def_site; // the (unique) NodeArg corresponding to the MLValue @@ -213,6 +221,7 @@ class PlannerImpl { FreeBufferInfo(OrtValueIndex ort_value, size_t dealloc_point) : ml_value(ort_value), deallocate_point(dealloc_point) {} }; + // freelist_ : a list of ml-values whose buffers are free to be reused, sorted by when // they became free (more recently freed earlier in the list). std::list freelist_; @@ -225,7 +234,8 @@ class PlannerImpl { } int& UseCount(OrtValueIndex n) { - ORT_ENFORCE(n >= 0 && static_cast(n) < ort_value_info_.size(), "invalid value index: ", n, " against size ", ort_value_info_.size()); + ORT_ENFORCE(n >= 0 && static_cast(n) < ort_value_info_.size(), + "invalid value index: ", n, " against size ", ort_value_info_.size()); return ort_value_info_[n].usecount; } int& UseCount(const OrtValueName& name) { return UseCount(Index(name)); } @@ -335,9 +345,9 @@ class PlannerImpl { // we cannot. const Node* producer_node = graph.GetProducerNode(p_input_arg->Name()); if (producer_node && HasExternalOutputs(*producer_node)) { - LOGS_DEFAULT(VERBOSE) << "Be noted Node " << node.Name() << " is reusing input buffer of node " - << producer_node->Name() << " which has external outputs. " - << "Be cautious the reuse MUST be a read-only usage."; + LOGS(logger_, VERBOSE) << "Be noted Node " << node.Name() << " is reusing input buffer of node " + << producer_node->Name() << " which has external outputs. " + << "Be cautious the reuse MUST be a read-only usage."; } #endif *reusable_input = Index(p_input_arg->Name()); @@ -361,9 +371,9 @@ class PlannerImpl { // we cannot. const Node* producer_node = graph.GetProducerNode(p_input_arg->Name()); if (producer_node && HasExternalOutputs(*producer_node)) { - LOGS_DEFAULT(VERBOSE) << "Be noted Node " << node.Name() << " is reusing input buffer of node " - << producer_node->Name() << " which has external outputs. " - << "Be cautious the reuse MUST be a read-only usage."; + LOGS(logger_, VERBOSE) << "Be noted Node " << node.Name() << " is reusing input buffer of node " + << producer_node->Name() << " which has external outputs. " + << "Be cautious the reuse MUST be a read-only usage."; } #endif *reusable_input = Index(p_input_arg->Name()); @@ -397,8 +407,8 @@ class PlannerImpl { } } else { #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) - LOGS_DEFAULT(VERBOSE) << "Node " << node.Name() << " cannot reuse input buffer for node " - << producer_node->Name() << " as it has external outputs"; + LOGS(logger_, VERBOSE) << "Node " << node.Name() << " cannot reuse input buffer for node " + << producer_node->Name() << " as it has external outputs"; #endif } } @@ -448,8 +458,8 @@ class PlannerImpl { return true; } else { #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) - LOGS_DEFAULT(VERBOSE) << "Node " << node.Name() << " cannot reuse strided output buffer for node " - << producer_node->Name() << " as it has external outputs."; + LOGS(logger_, VERBOSE) << "Node " << node.Name() << " cannot reuse strided output buffer for node " + << producer_node->Name() << " as it has external outputs."; #endif } } @@ -1198,9 +1208,9 @@ class PlannerImpl { // Otherwise, we cannot reuse the buffer. const Node* producer_node = graph_viewer.GetProducerNode(p_input_arg->Name()); if (producer_node && HasExternalOutputs(*producer_node)) { - LOGS_DEFAULT(VERBOSE) << "Be noted input buffer " << p_output_arg->Name() << " of node " - << producer_node->Name() << " which has external outputs is reused. " - << "Be cautious the reuse MUST be a read-only usage."; + LOGS(logger_, VERBOSE) << "Be noted input buffer " << p_output_arg->Name() << " of node " + << producer_node->Name() << " which has external outputs is reused. " + << "Be cautious the reuse MUST be a read-only usage."; } #endif @@ -1241,9 +1251,9 @@ class PlannerImpl { // Otherwise, we cannot reuse the buffer. const Node* producer_node = graph_viewer.GetProducerNode(p_input_arg->Name()); if (producer_node && HasExternalOutputs(*producer_node)) { - LOGS_DEFAULT(VERBOSE) << "Be noted input buffer " << p_output_arg->Name() << " of node " - << producer_node->Name() << " which has external outputs is reused. " - << "Be cautious the reuse MUST be a read-only usage."; + LOGS(logger_, VERBOSE) << "Be noted input buffer " << p_output_arg->Name() << " of node " + << producer_node->Name() << " which has external outputs is reused. " + << "Be cautious the reuse MUST be a read-only usage."; } #endif @@ -1290,8 +1300,8 @@ class PlannerImpl { } } else { #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) - LOGS_DEFAULT(VERBOSE) << "Node " << node->Name() << " cannot reuse input buffer for node " - << producer_node->Name() << " as it has external outputs"; + LOGS(logger_, VERBOSE) << "Node " << node->Name() << " cannot reuse input buffer for node " + << producer_node->Name() << " as it has external outputs"; #endif } } @@ -1869,8 +1879,7 @@ class PlannerImpl { } #ifndef ORT_ENABLE_STREAM - void PartitionIntoStreams(const logging::Logger& /*logger*/, - const ExecutionProviders& /*execution_providers*/, + void PartitionIntoStreams(const ExecutionProviders& /*execution_providers*/, const PathString& /*partition_config_file*/) { if (graph_viewer_.NumberOfNodes() > 0) { stream_nodes_.push_back({}); @@ -1915,11 +1924,11 @@ class PlannerImpl { #else - void - PartitionIntoStreams(const logging::Logger& logger, const ExecutionProviders& execution_providers, - const PathString& partition_config_file) { - auto partitioner = IGraphPartitioner::CreateGraphPartitioner(logger, partition_config_file); - auto status = partitioner->PartitionGraph(graph_viewer_, execution_providers, stream_nodes_, context_->GetExecutionOrder()); + void PartitionIntoStreams(const ExecutionProviders& execution_providers, + const PathString& partition_config_file) { + auto partitioner = IGraphPartitioner::CreateGraphPartitioner(logger_, partition_config_file); + auto status = partitioner->PartitionGraph(graph_viewer_, execution_providers, stream_nodes_, + context_->GetExecutionOrder()); ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); plan_.node_stream_map_.resize(SafeInt(graph_viewer_.MaxNodeIndex()) + 1); for (size_t i = 0; i < stream_nodes_.size(); ++i) { @@ -2282,10 +2291,9 @@ Status PlannerImpl::CreatePlan( #ifdef ORT_ENABLE_STREAM const IStreamCommandHandleRegistry& stream_handle_registry, #endif - const PathString& partition_config_file, - const logging::Logger& logger) { + const PathString& partition_config_file) { // 1. partition graph into streams - PartitionIntoStreams(logger, execution_providers_, this->parent_node_ ? PathString{} : partition_config_file); + PartitionIntoStreams(execution_providers_, parent_node_ ? PathString{} : partition_config_file); // 2. initialize the plan based on stream partition result int num_ml_values = ort_value_name_idx_map_.MaxIdx() + 1; @@ -2354,14 +2362,13 @@ Status SequentialPlanner::CreatePlan( PlannerImpl planner(parent_node, graph_viewer, outer_scope_node_args, providers, kernel_create_info_map, subgraphs_kernel_create_info_maps, outer_scope_node_arg_to_location_map, - ort_value_name_idx_map, context, *plan); + ort_value_name_idx_map, context, *plan, logger); return planner.CreatePlan( #ifdef ORT_ENABLE_STREAM stream_handle_registry, #endif - partition_config_file, - logger); + partition_config_file); } #ifdef ORT_ENABLE_STREAM diff --git a/onnxruntime/core/framework/fallback_cpu_capability.cc b/onnxruntime/core/framework/fallback_cpu_capability.cc index ef68b88187e08..1eb7420b44d2c 100644 --- a/onnxruntime/core/framework/fallback_cpu_capability.cc +++ b/onnxruntime/core/framework/fallback_cpu_capability.cc @@ -41,7 +41,8 @@ static bool IsSmallInitializer(const onnxruntime::GraphViewer& graph, const Node std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewer& graph, const IExecutionProvider::IKernelLookup& kernel_lookup, - gsl::span tentative_nodes) { + gsl::span tentative_nodes, + const logging::Logger& logger) { // automatic conversion from const std::vector& const auto& ordered_nodes = graph.GetNodesInTopologicalOrder(); InlinedVector node_id_to_order_map(graph.MaxNodeIndex()); @@ -83,7 +84,7 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe auto consumer_nodes = graph.GetConsumerNodes(node_arg.Name()); for (auto& consumer_node : consumer_nodes) { candidates.push(consumer_node->Index()); - LOGS_DEFAULT(INFO) << "Candidate for fallback CPU execution: " << consumer_node->Name(); + LOGS(logger, INFO) << "Candidate for fallback CPU execution: " << consumer_node->Name(); } } return Status::OK(); @@ -159,9 +160,9 @@ std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewe if (place_in_cpu) { cpu_nodes.insert(cur); - LOGS_DEFAULT(INFO) << "ORT optimization- Force fallback to CPU execution for node: " << node->Name() - << " because the CPU execution path is deemed faster than overhead involved with execution on other EPs " - << " capable of executing this node"; + LOGS(logger, INFO) << "ORT optimization- Force fallback to CPU execution for node: " << node->Name() + << " because the CPU execution path is deemed faster than overhead involved with execution " + "on other EPs capable of executing this node"; for (auto* output : node->OutputDefs()) { cpu_output_args.insert(output); } diff --git a/onnxruntime/core/framework/fallback_cpu_capability.h b/onnxruntime/core/framework/fallback_cpu_capability.h index c5bcd22888b7c..bca75adbfd5a7 100644 --- a/onnxruntime/core/framework/fallback_cpu_capability.h +++ b/onnxruntime/core/framework/fallback_cpu_capability.h @@ -9,6 +9,9 @@ #include "core/graph/graph_viewer.h" namespace onnxruntime { +namespace logging { +class Logger; +} /** Returns a list of nodes that are preferred on CPU. @@ -19,6 +22,7 @@ namespace onnxruntime { */ std::unordered_set GetCpuPreferredNodes(const GraphViewer& graph, const IExecutionProvider::IKernelLookup& kernel_lookup, - gsl::span tentative_nodes); + gsl::span tentative_nodes, + const logging::Logger& logger); } // namespace onnxruntime diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc index 6174122cf3cb4..406fc1b15effc 100644 --- a/onnxruntime/core/framework/graph_partitioner.cc +++ b/onnxruntime/core/framework/graph_partitioner.cc @@ -149,13 +149,13 @@ auto get_capabilities = [](const IExecutionProvider& ep, }; } // namespace -static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) { +static Status GetCapabilityForEP(const GetCapabilityForEPParams& params, const logging::Logger& logger) { auto& current_ep = params.current_ep.get(); const auto& ep_type = current_ep.Type(); #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) if (current_ep.GetPreferredLayout() == DataLayout::NHWC && !params.transform_layout.get()) { - LOGS_DEFAULT(WARNING) << ep_type << " cannot be used with this model due to its ONNX opset not being supported by " + LOGS(logger, WARNING) << ep_type << " cannot be used with this model due to its ONNX opset not being supported by " "the layout transformer."; return Status::OK(); } @@ -165,7 +165,8 @@ static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) { const auto kernel_registries_for_ep = kernel_registry_mgr.GetKernelRegistriesByProviderType(ep_type); const KernelLookup kernel_lookup{ep_type, kernel_registries_for_ep, - kernel_registry_mgr.GetKernelTypeStrResolver()}; + kernel_registry_mgr.GetKernelTypeStrResolver(), + logger}; auto& graph = params.graph.get(); auto& capabilities = params.capabilities.get(); @@ -248,13 +249,15 @@ static Status GetCapabilityForEP(const GetCapabilityForEPParams& params) { static Status GetCapabilityForEPForAotInlining(const GraphViewer& graph_viewer, const KernelRegistryManager& kernel_registry_mgr, const IExecutionProvider& current_ep, + const logging::Logger& logger, std::vector>& capabilities) { const auto& ep_type = current_ep.Type(); const auto kernel_registries_for_ep = kernel_registry_mgr.GetKernelRegistriesByProviderType(ep_type); const KernelLookup kernel_lookup{ep_type, kernel_registries_for_ep, - kernel_registry_mgr.GetKernelTypeStrResolver()}; + kernel_registry_mgr.GetKernelTypeStrResolver(), + logger}; // TODO: Provide EP with a capability to look inside the functions. capabilities = get_capabilities(current_ep, graph_viewer, kernel_lookup); @@ -359,7 +362,8 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr, GraphPartitioner::Mode mode, int& fused_node_unique_id, const layout_transformation::TransformLayoutFunction& transform_layout_fn, - const layout_transformation::DebugGraphFn& debug_graph_fn) { + const layout_transformation::DebugGraphFn& debug_graph_fn, + const logging::Logger& logger) { // handle testing edge case where optimizers or constant lifting results in graph with no nodes. // doing it here saves all providers checking for this in GetCapability if (graph.NumberOfNodes() == 0) { @@ -373,7 +377,7 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr, // we pass through the FuncManager from the top level graph ORT_RETURN_IF_ERROR(PartitionOnnxFormatModelImpl(*subgraph, func_mgr, kernel_registry_mgr, fused_kernel_registry, current_ep, mode, fused_node_unique_id, - transform_layout_fn, debug_graph_fn)); + transform_layout_fn, debug_graph_fn, logger)); } } @@ -398,7 +402,7 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr, std::cref(transform_layout_fn), std::cref(debug_graph_fn)}; - ORT_RETURN_IF_ERROR(GetCapabilityForEP(get_capability_params)); + ORT_RETURN_IF_ERROR(GetCapabilityForEP(get_capability_params, logger)); if (capabilities.empty()) { return Status::OK(); } @@ -425,7 +429,7 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr, Node* n = PlaceNode(graph, *capability->sub_graph, fusion_style, type, mode, fused_node_unique_id); if (n != nullptr) { // searching in kernel registries, if no kernel registered for the fused_node, use compile approach - if (!KernelRegistryManager::HasImplementationOf(kernel_registry_mgr, *n, type)) { + if (!KernelRegistryManager::HasImplementationOf(kernel_registry_mgr, *n, type, logger)) { nodes_to_compile.push_back(n); capabilities_to_compile.push_back(std::move(capability)); } else { @@ -559,6 +563,7 @@ static Status InlineNodes(Graph& graph, bool& modified_graph) { static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_providers, const KernelRegistryManager& kernel_registry_mgr, Graph& graph, + const logging::Logger& logger, InlinedHashSet& not_inlined, size_t& inlined_count) { // handle testing edge case where optimizers or constant lifting results in graph with no nodes. @@ -574,6 +579,7 @@ static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_provide ORT_RETURN_IF_ERROR(InlineFunctionsAOTImpl(execution_providers, kernel_registry_mgr, *subgraph, + logger, not_inlined, inlined_count)); } @@ -597,7 +603,8 @@ static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_provide InlinedHashSet claimed_by_ep; for (const auto& ep : execution_providers) { std::vector> capabilities; - ORT_RETURN_IF_ERROR(GetCapabilityForEPForAotInlining(graph_viewer, kernel_registry_mgr, *ep, capabilities)); + ORT_RETURN_IF_ERROR(GetCapabilityForEPForAotInlining(graph_viewer, kernel_registry_mgr, *ep, logger, + capabilities)); for (auto& capability : capabilities) { const auto& nodes = capability->sub_graph->nodes; if (nodes.size() == 1) { @@ -727,7 +734,8 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers static Status PartitionOnnxFormatModel(const PartitionParams& partition_params, GraphPartitioner::Mode mode, const ExecutionProviders& execution_providers, - KernelRegistryManager& kernel_registry_manager) { + KernelRegistryManager& kernel_registry_manager, + const logging::Logger& logger) { bool modified_graph = false; auto& graph = partition_params.graph.get(); @@ -742,7 +750,8 @@ static Status PartitionOnnxFormatModel(const PartitionParams& partition_params, ORT_RETURN_IF_ERROR(PartitionOnnxFormatModelImpl(graph, func_mgr, kernel_registry_manager, fused_kernel_registry, *ep, mode, fused_node_unique_id, transform_layout_function, - partition_params.debug_graph_fn)); + partition_params.debug_graph_fn, + logger)); } // expand any nodes that have an ONNX function definition but no matching ORT kernel. @@ -762,7 +771,8 @@ static Status PartitionOnnxFormatModel(const PartitionParams& partition_params, static Status PartitionOrtFormatModelImpl(const PartitionParams& partition_params, KernelRegistryManager& kernel_registry_mgr, - IExecutionProvider& current_ep) { + IExecutionProvider& current_ep, + const logging::Logger& logger) { // handle testing edge case where optimizers or constant lifting results in graph with no nodes. // doing it here saves all providers checking for this in GetCapability auto& graph = partition_params.graph.get(); @@ -776,7 +786,8 @@ static Status PartitionOrtFormatModelImpl(const PartitionParams& partition_param auto& subgraph = *entry.second; PartitionParams subgraph_partition_params = partition_params; subgraph_partition_params.graph = std::ref(subgraph); - ORT_RETURN_IF_ERROR(PartitionOrtFormatModelImpl(subgraph_partition_params, kernel_registry_mgr, current_ep)); + ORT_RETURN_IF_ERROR(PartitionOrtFormatModelImpl(subgraph_partition_params, kernel_registry_mgr, current_ep, + logger)); } } @@ -795,7 +806,7 @@ static Status PartitionOrtFormatModelImpl(const PartitionParams& partition_param }; // clang-format on - ORT_RETURN_IF_ERROR(GetCapabilityForEP(get_capability_params)); + ORT_RETURN_IF_ERROR(GetCapabilityForEP(get_capability_params, logger)); if (capabilities.empty()) { return Status::OK(); } @@ -876,10 +887,11 @@ static Status PartitionOrtFormatModelImpl(const PartitionParams& partition_param // Simplified partitioning where custom EPs may produce compiled nodes. static Status PartitionOrtFormatModel(const PartitionParams& partition_params, const ExecutionProviders& execution_providers, - KernelRegistryManager& kernel_registry_manager) { + KernelRegistryManager& kernel_registry_manager, + const logging::Logger& logger) { // process full graph with each EP for (const auto& ep : execution_providers) { - ORT_RETURN_IF_ERROR(PartitionOrtFormatModelImpl(partition_params, kernel_registry_manager, *ep)); + ORT_RETURN_IF_ERROR(PartitionOrtFormatModelImpl(partition_params, kernel_registry_manager, *ep, logger)); } return Status::OK(); @@ -906,6 +918,7 @@ Status GraphPartitioner::InlineFunctionsAOT(Model& model, ORT_RETURN_IF_ERROR(InlineFunctionsAOTImpl(execution_providers, kernel_registry_manager, graph, + logger, not_inlined, inlined_count)); @@ -977,8 +990,7 @@ Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr, if (mode == Mode::kNormal || mode == Mode::kAssignOnly) { #if !defined(ORT_MINIMAL_BUILD) - ORT_RETURN_IF_ERROR(PartitionOnnxFormatModel(partition_params, mode, - providers_, kernel_registry_mgr_)); + ORT_RETURN_IF_ERROR(PartitionOnnxFormatModel(partition_params, mode, providers_, kernel_registry_mgr_, logger)); bool ep_context_enabled = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; std::string ep_context_path = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); @@ -991,8 +1003,7 @@ Status GraphPartitioner::Partition(Graph& graph, FuncManager& func_mgr, return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "ONNX models are not supported in this build."); #endif //! defined(ORT_MINIMAL_BUILD) } else { - ORT_RETURN_IF_ERROR(PartitionOrtFormatModel(partition_params, - providers_, kernel_registry_mgr_)); + ORT_RETURN_IF_ERROR(PartitionOrtFormatModel(partition_params, providers_, kernel_registry_mgr_, logger)); } #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) diff --git a/onnxruntime/core/framework/kernel_lookup.h b/onnxruntime/core/framework/kernel_lookup.h index 0dd17d2f4a624..fac43bad0fefb 100644 --- a/onnxruntime/core/framework/kernel_lookup.h +++ b/onnxruntime/core/framework/kernel_lookup.h @@ -21,17 +21,19 @@ class KernelLookup final : public IExecutionProvider::IKernelLookup { public: KernelLookup(ProviderType provider_type, gsl::span> kernel_registries, - const IKernelTypeStrResolver& kernel_type_str_resolver) + const IKernelTypeStrResolver& kernel_type_str_resolver, + const logging::Logger& logger) : provider_type_{provider_type}, kernel_registries_{kernel_registries}, - kernel_type_str_resolver_{kernel_type_str_resolver} { + kernel_type_str_resolver_{kernel_type_str_resolver}, + logger_{logger} { ORT_ENFORCE(!provider_type_.empty(), "provider_type must be specified."); } const KernelCreateInfo* LookUpKernel(const Node& node) const override { const KernelCreateInfo* kernel_create_info{}; for (const auto& registry : kernel_registries_) { - const auto lookup_status = registry->TryFindKernel(node, provider_type_, kernel_type_str_resolver_, + const auto lookup_status = registry->TryFindKernel(node, provider_type_, kernel_type_str_resolver_, logger_, &kernel_create_info); if (lookup_status.IsOK() && kernel_create_info != nullptr) { return kernel_create_info; @@ -45,6 +47,7 @@ class KernelLookup final : public IExecutionProvider::IKernelLookup { ProviderType provider_type_; const gsl::span> kernel_registries_; const IKernelTypeStrResolver& kernel_type_str_resolver_; + const logging::Logger& logger_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/framework/kernel_registry.cc b/onnxruntime/core/framework/kernel_registry.cc index cbbe0f86b8b7e..8602a3b4004ff 100644 --- a/onnxruntime/core/framework/kernel_registry.cc +++ b/onnxruntime/core/framework/kernel_registry.cc @@ -183,6 +183,7 @@ Status KernelRegistry::TryFindKernelImpl(const Node& node, ProviderType exec_provider, const IKernelTypeStrResolver* kernel_type_str_resolver, const TypeConstraintMap* type_constraints, + const logging::Logger& logger, const KernelCreateInfo** out) const { const auto& node_provider = node.GetExecutionProviderType(); const auto& expected_provider = (node_provider.empty() ? exec_provider : node_provider); @@ -215,7 +216,7 @@ Status KernelRegistry::TryFindKernelImpl(const Node& node, std::ostream_iterator(oss, "\n")); oss << ")"; - VLOGS_DEFAULT(2) << "TryFindKernel failed, Reason: " << oss.str(); + VLOGS(logger, 2) << "TryFindKernel failed, Reason: " << oss.str(); return Status(common::ONNXRUNTIME, common::FAIL, oss.str()); } @@ -224,14 +225,16 @@ Status KernelRegistry::TryFindKernelImpl(const Node& node, Status KernelRegistry::TryFindKernel(const Node& node, ProviderType exec_provider, const IKernelTypeStrResolver& kernel_type_str_resolver, + const logging::Logger& logger, const KernelCreateInfo** out) const { - return TryFindKernelImpl(node, exec_provider, &kernel_type_str_resolver, nullptr, out); + return TryFindKernelImpl(node, exec_provider, &kernel_type_str_resolver, nullptr, logger, out); } Status KernelRegistry::TryFindKernel(const Node& node, ProviderType exec_provider, const TypeConstraintMap& type_constraints, + const logging::Logger& logger, const KernelCreateInfo** out) const { - return TryFindKernelImpl(node, exec_provider, nullptr, &type_constraints, out); + return TryFindKernelImpl(node, exec_provider, nullptr, &type_constraints, logger, out); } static bool KernelDefCompatible(int version, const KernelDef& kernel_def, @@ -261,6 +264,7 @@ Status KernelRegistry::TryFindKernel(ProviderType exec_provider, std::string_view domain, int version, const KernelRegistry::TypeConstraintMap& type_constraints, + const logging::Logger& logger, const KernelCreateInfo** out) const { auto range = kernel_creator_fn_map_.equal_range(GetMapKey(op_type, domain, exec_provider)); if (out) *out = nullptr; @@ -289,7 +293,7 @@ Status KernelRegistry::TryFindKernel(ProviderType exec_provider, std::ostream_iterator(oss, "\n")); oss << ")"; - VLOGS_DEFAULT(2) << "TryFindKernel failed, Reason: " << oss.str(); + VLOGS(logger, 2) << "TryFindKernel failed, Reason: " << oss.str(); return Status(common::ONNXRUNTIME, common::FAIL, oss.str()); } diff --git a/onnxruntime/core/framework/kernel_registry_manager.cc b/onnxruntime/core/framework/kernel_registry_manager.cc index f8ccdb8fb0238..721353854a474 100644 --- a/onnxruntime/core/framework/kernel_registry_manager.cc +++ b/onnxruntime/core/framework/kernel_registry_manager.cc @@ -57,7 +57,7 @@ void KernelRegistryManager::RegisterKernelRegistry(std::shared_ptrTryFindKernel(node, std::string(), GetKernelTypeStrResolver(), kernel_create_info); + status = registry->TryFindKernel(node, std::string(), GetKernelTypeStrResolver(), logger, kernel_create_info); if (status.IsOK()) { return status; } @@ -95,7 +95,7 @@ Status KernelRegistryManager::SearchKernelRegistry(const Node& node, } if (p != nullptr) { - status = p->TryFindKernel(node, std::string(), GetKernelTypeStrResolver(), kernel_create_info); + status = p->TryFindKernel(node, std::string(), GetKernelTypeStrResolver(), logger, kernel_create_info); if (status.IsOK()) { return status; } @@ -104,10 +104,14 @@ Status KernelRegistryManager::SearchKernelRegistry(const Node& node, return Status(ONNXRUNTIME, NOT_IMPLEMENTED, create_error_message("Failed to find kernel for ")); } -bool KernelRegistryManager::HasImplementationOf(const KernelRegistryManager& r, const Node& node, const std::string& provider_type) { +bool KernelRegistryManager::HasImplementationOf(const KernelRegistryManager& r, + const Node& node, + const std::string& provider_type, + const logging::Logger& logger) { const auto kernel_registries = r.GetKernelRegistriesByProviderType(provider_type); return std::any_of(kernel_registries.begin(), kernel_registries.end(), [&](const KernelRegistry* kernel_registry) { - return KernelRegistry::HasImplementationOf(*kernel_registry, node, provider_type, r.GetKernelTypeStrResolver()); + return KernelRegistry::HasImplementationOf(*kernel_registry, node, provider_type, r.GetKernelTypeStrResolver(), + logger); }); } diff --git a/onnxruntime/core/framework/kernel_registry_manager.h b/onnxruntime/core/framework/kernel_registry_manager.h index 1da73208cb536..72f0ed3c6268a 100644 --- a/onnxruntime/core/framework/kernel_registry_manager.h +++ b/onnxruntime/core/framework/kernel_registry_manager.h @@ -67,13 +67,14 @@ class KernelRegistryManager { // This function assumes the node is already assigned to an execution provider // Don't call this function before graph partition is done - Status SearchKernelRegistry(const Node& node, + Status SearchKernelRegistry(const Node& node, const logging::Logger& logger, /*out*/ const KernelCreateInfo** kernel_create_info) const; /** * Whether this node can be run on this provider */ - static bool HasImplementationOf(const KernelRegistryManager& r, const Node& node, const std::string& provider_type); + static bool HasImplementationOf(const KernelRegistryManager& r, const Node& node, const std::string& provider_type, + const logging::Logger& logger); Status CreateKernel(const Node& node, const IExecutionProvider& execution_provider, diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 0d0b22ff61e01..0ac2271ba09f1 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -178,7 +178,7 @@ Status SessionState::PopulateKernelCreateInfo(const KernelRegistryManager& kerne bool saving_ort_format) { for (auto& node : graph_.Nodes()) { const KernelCreateInfo* kci = nullptr; - auto status = kernel_registry_manager.SearchKernelRegistry(node, &kci); + auto status = kernel_registry_manager.SearchKernelRegistry(node, logger_, &kci); if (!status.IsOK() && saving_ort_format) { // if we didn't find the kernel and are saving to ORT format an EP that compiles nodes is enabled. // in that case we assigned the node to that EP but do not compile it into a fused node. @@ -187,7 +187,7 @@ Status SessionState::PopulateKernelCreateInfo(const KernelRegistryManager& kerne // at runtime when the model is loaded in a minimal build, the compiling EP will replace this node if possible. // if that's not possible for some reason we can fallback to the CPU EP implementation. node.SetExecutionProviderType(kCpuExecutionProvider); - status = kernel_registry_manager.SearchKernelRegistry(node, &kci); + status = kernel_registry_manager.SearchKernelRegistry(node, logger_, &kci); } ORT_RETURN_IF_ERROR(status); diff --git a/onnxruntime/core/optimizer/constant_folding.cc b/onnxruntime/core/optimizer/constant_folding.cc index 1466de51d0b99..e755b4bfa6364 100644 --- a/onnxruntime/core/optimizer/constant_folding.cc +++ b/onnxruntime/core/optimizer/constant_folding.cc @@ -227,11 +227,12 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level, #if !defined(DISABLE_SPARSE_TENSORS) // Create execution frame for executing constant nodes. OptimizerExecutionFrame::Info info({node}, constant_inputs, graph.ModelPath(), execution_provider_, - is_sparse_initializer_check); + is_sparse_initializer_check, logger); #else // Create execution frame for executing constant nodes. - OptimizerExecutionFrame::Info info({node}, constant_inputs, graph.ModelPath(), execution_provider_, - [](std::string const&) { return false; }); + OptimizerExecutionFrame::Info info( + {node}, constant_inputs, graph.ModelPath(), execution_provider_, [](const std::string&) { return false; }, + logger); #endif std::vector fetch_mlvalue_idxs; diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc index 2f2524420dc44..ba2b87b5aa0ca 100644 --- a/onnxruntime/core/optimizer/graph_transformer_utils.cc +++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc @@ -190,6 +190,7 @@ InlinedVector> GenerateTransformers( TransformerLevel level, const SessionOptions& session_options, const IExecutionProvider& cpu_execution_provider, /*required by constant folding*/ + const logging::Logger& logger, const InlinedHashSet& rules_and_transformers_to_disable, [[maybe_unused]] concurrency::ThreadPool* intra_op_thread_pool, std::unordered_map>* p_buffered_tensors) { @@ -404,7 +405,8 @@ InlinedVector> GenerateTransformers( } auto cpu_registry = cpu_execution_provider.GetKernelRegistry(); - auto nhwc_transformer = std::make_unique(std::move(cpu_allocator), std::move(cpu_registry)); + auto nhwc_transformer = std::make_unique(std::move(cpu_allocator), std::move(cpu_registry), + logger); if (nhwc_transformer->IsActive()) { transformers.emplace_back(std::move(nhwc_transformer)); } @@ -437,6 +439,7 @@ InlinedVector> GenerateTransformersForMinimalB const SessionOptions& session_options, const SatApplyContextVariant& apply_context, const IExecutionProvider& cpu_execution_provider, + const logging::Logger& logger, const InlinedHashSet& rules_and_transformers_to_disable, [[maybe_unused]] concurrency::ThreadPool* intra_op_thread_pool, std::unordered_map>* p_buffered_tensors) { @@ -490,7 +493,8 @@ InlinedVector> GenerateTransformersForMinimalB #ifndef DISABLE_CONTRIB_OPS AllocatorPtr cpu_allocator = std::make_shared(); auto cpu_registry = cpu_execution_provider.GetKernelRegistry(); - auto nhwc_transformer = std::make_unique(std::move(cpu_allocator), std::move(cpu_registry)); + auto nhwc_transformer = std::make_unique(std::move(cpu_allocator), std::move(cpu_registry), + logger); if (nhwc_transformer->IsActive()) { transformers.emplace_back(std::move(nhwc_transformer)); } diff --git a/onnxruntime/core/optimizer/insert_cast_transformer.cc b/onnxruntime/core/optimizer/insert_cast_transformer.cc index 67ebc22dab41d..b1665c7172549 100644 --- a/onnxruntime/core/optimizer/insert_cast_transformer.cc +++ b/onnxruntime/core/optimizer/insert_cast_transformer.cc @@ -84,7 +84,9 @@ static bool NodeNeedsInputCastToFp32(const onnxruntime::Node& node) { // going to a node that will need a Cast. // // Return true if all the fp16 inputs and outputs are connected to nodes that will be cast to fp32. -static bool IsIsolatedFp16NodeOnCpu(const onnxruntime::Node& node, onnxruntime::Graph& graph, const KernelRegistry& cpu_kernel_registry) { +static bool IsIsolatedFp16NodeOnCpu(const onnxruntime::Node& node, onnxruntime::Graph& graph, + const KernelRegistry& cpu_kernel_registry, + const logging::Logger& logger) { // we can check if it's an isolated fp16 node // if node has input coming from other nodes (only consuming graph inputs or initializers if it doesn't), // does not have a subgraph (would have to alter subgraph inputs if we cast the input to this node), @@ -211,7 +213,7 @@ static bool IsIsolatedFp16NodeOnCpu(const onnxruntime::Node& node, onnxruntime:: const KernelCreateInfo* kernel_create_info{}; const auto lookup_status = cpu_kernel_registry.TryFindKernel( kCpuExecutionProvider, node.OpType(), node.Domain(), - node.SinceVersion(), type_constraint_map, &kernel_create_info); + node.SinceVersion(), type_constraint_map, logger, &kernel_create_info); if (lookup_status.IsOK() && kernel_create_info != nullptr) { return true; } @@ -220,9 +222,10 @@ static bool IsIsolatedFp16NodeOnCpu(const onnxruntime::Node& node, onnxruntime:: return false; } -static Status ForceSingleNodeCPUFloat16ToFloat32(onnxruntime::Graph& graph, const KernelRegistry& cpu_kernel_registry) { +static Status ForceSingleNodeCPUFloat16ToFloat32(onnxruntime::Graph& graph, const KernelRegistry& cpu_kernel_registry, + const logging::Logger& logger) { for (auto& node : graph.Nodes()) { - if (IsIsolatedFp16NodeOnCpu(node, graph, cpu_kernel_registry)) { + if (IsIsolatedFp16NodeOnCpu(node, graph, cpu_kernel_registry, logger)) { // unassign the node so that NeedInsertCast will return true for it, forcing it to fp32 node.SetExecutionProviderType(""); } @@ -319,7 +322,8 @@ class RemoveDuplicateCastTransformer : public GraphTransformer { return dst_bit_length <= src_bit_length; } - if ((*src_type == "tensor(float16)" && *dst_type == "tensor(bfloat16)") || (*src_type == "tensor(bfloat16)" && *dst_type == "tensor(float16)")) { + if ((*src_type == "tensor(float16)" && *dst_type == "tensor(bfloat16)") || + (*src_type == "tensor(bfloat16)" && *dst_type == "tensor(float16)")) { return true; } @@ -453,7 +457,7 @@ class RemoveDuplicateCastTransformer : public GraphTransformer { Status InsertCastTransformer::ApplyImpl(onnxruntime::Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const { if (force_cpu_fp32_) - ORT_RETURN_IF_ERROR(ForceSingleNodeCPUFloat16ToFloat32(graph, *cpu_kernel_registries_)); + ORT_RETURN_IF_ERROR(ForceSingleNodeCPUFloat16ToFloat32(graph, *cpu_kernel_registries_, logger)); GraphViewer graph_viewer(graph); auto& order = graph_viewer.GetNodesInTopologicalOrder(); diff --git a/onnxruntime/core/optimizer/nhwc_transformer.cc b/onnxruntime/core/optimizer/nhwc_transformer.cc index ee79fa620374e..cd654991c92d5 100644 --- a/onnxruntime/core/optimizer/nhwc_transformer.cc +++ b/onnxruntime/core/optimizer/nhwc_transformer.cc @@ -44,7 +44,9 @@ NhwcConvLookup( return &(iter->second); } -NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptr cpu_kernel_registry) noexcept +NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, + std::shared_ptr cpu_kernel_registry, + const logging::Logger& logger) noexcept : GraphTransformer("NhwcTransformer"), cpu_allocator_(std::move(cpu_allocator)) { if (!cpu_kernel_registry) { // This is a CPU op nodes optimizer, not useful if cpu EP is not available. @@ -64,7 +66,7 @@ NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptrTryFindKernel( kCpuExecutionProvider, qconv_int8.op_type_, qconv_int8.domain_, - qconv_int8.version_, qconv_int8.type_constraints_, &kernel_create_info); + qconv_int8.version_, qconv_int8.type_constraints_, logger, &kernel_create_info); if (status.IsOK() && kernel_create_info != nullptr) { kernel_create_info = nullptr; conv_table_.emplace( @@ -83,7 +85,7 @@ NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptrTryFindKernel( kCpuExecutionProvider, qconv_uint8.op_type_, qconv_uint8.domain_, - qconv_uint8.version_, qconv_uint8.type_constraints_, &kernel_create_info); + qconv_uint8.version_, qconv_uint8.type_constraints_, logger, &kernel_create_info); if (status.IsOK() && kernel_create_info != nullptr) { kernel_create_info = nullptr; conv_table_.emplace( @@ -103,7 +105,7 @@ NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptrTryFindKernel( kCpuExecutionProvider, nhwc_conv_fp16.op_type_, nhwc_conv_fp16.domain_, - nhwc_conv_fp16.version_, nhwc_conv_fp16.type_constraints_, &kernel_create_info); + nhwc_conv_fp16.version_, nhwc_conv_fp16.type_constraints_, logger, &kernel_create_info); if (status.IsOK() && kernel_create_info != nullptr) { kernel_create_info = nullptr; conv_table_.emplace( @@ -123,7 +125,7 @@ NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptrTryFindKernel( kCpuExecutionProvider, nhwc_maxpool_fp16.op_type_, nhwc_maxpool_fp16.domain_, - nhwc_maxpool_fp16.version_, nhwc_maxpool_fp16.type_constraints_, &kernel_create_info); + nhwc_maxpool_fp16.version_, nhwc_maxpool_fp16.type_constraints_, logger, &kernel_create_info); if (status.IsOK() && kernel_create_info != nullptr) { kernel_create_info = nullptr; conv_table_.emplace( @@ -140,7 +142,7 @@ NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptrTryFindKernel( kCpuExecutionProvider, nhwc_avgpool_fp16.op_type_, nhwc_avgpool_fp16.domain_, - nhwc_avgpool_fp16.version_, nhwc_avgpool_fp16.type_constraints_, &kernel_create_info); + nhwc_avgpool_fp16.version_, nhwc_avgpool_fp16.type_constraints_, logger, &kernel_create_info); if (status.IsOK() && kernel_create_info != nullptr) { kernel_create_info = nullptr; conv_table_.emplace( @@ -157,7 +159,7 @@ NhwcTransformer::NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptrTryFindKernel( kCpuExecutionProvider, nhwc_gavgpool_fp16.op_type_, nhwc_gavgpool_fp16.domain_, - nhwc_gavgpool_fp16.version_, nhwc_gavgpool_fp16.type_constraints_, &kernel_create_info); + nhwc_gavgpool_fp16.version_, nhwc_gavgpool_fp16.type_constraints_, logger, &kernel_create_info); if (status.IsOK() && kernel_create_info != nullptr) { kernel_create_info = nullptr; conv_table_.emplace( diff --git a/onnxruntime/core/optimizer/nhwc_transformer.h b/onnxruntime/core/optimizer/nhwc_transformer.h index 000732060b889..c65f851fdab9d 100644 --- a/onnxruntime/core/optimizer/nhwc_transformer.h +++ b/onnxruntime/core/optimizer/nhwc_transformer.h @@ -75,7 +75,8 @@ and inserts nodes to transpose tensors as needed. class NhwcTransformer : public GraphTransformer { private: public: - explicit NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptr cpu_kernel_registry) noexcept; + explicit NhwcTransformer(AllocatorPtr cpu_allocator, std::shared_ptr cpu_kernel_registry, + const logging::Logger& logger) noexcept; /** * @brief Usually called right after constructor, it shows whether diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc index ed7d5feb2beb3..b2e8e491c361c 100644 --- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc +++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc @@ -32,9 +32,11 @@ OptimizerExecutionFrame::Info::Info(const std::vector& nodes, const InitializedTensorSet& initialized_tensor_set, const std::filesystem::path& model_path, const IExecutionProvider& execution_provider, - const std::function& is_sparse_initializer_func) + const std::function& is_sparse_initializer_func, + const logging::Logger& logger) : execution_provider_(execution_provider), - is_sparse_initializer_func_(is_sparse_initializer_func) { + is_sparse_initializer_func_(is_sparse_initializer_func), + logger_(logger) { allocator_ptr_ = std::make_shared(); ORT_ENFORCE(allocator_ptr_, "Failed to get allocator for optimizer"); @@ -79,9 +81,11 @@ OptimizerExecutionFrame::Info::Info(const std::vector& nodes, const std::unordered_map& initialized_tensor_set, const std::filesystem::path& /* model_path */, const IExecutionProvider& execution_provider, - const std::function& is_sparse_initializer_func) + const std::function& is_sparse_initializer_func, + const logging::Logger& logger) : execution_provider_(execution_provider), - is_sparse_initializer_func_(is_sparse_initializer_func) { + is_sparse_initializer_func_(is_sparse_initializer_func), + logger_(logger) { allocator_ptr_ = std::make_shared(); ORT_ENFORCE(allocator_ptr_, "Failed to get allocator for optimizer"); @@ -117,7 +121,7 @@ OptimizerExecutionFrame::Info::Info(const std::vector& nodes, Status OptimizerExecutionFrame::Info::TryFindKernel(const Node* node, const KernelCreateInfo** out) const { std::shared_ptr kernel_registry = execution_provider_.GetKernelRegistry(); const OpSchemaKernelTypeStrResolver kernel_type_str_resolver{}; - return kernel_registry->TryFindKernel(*node, execution_provider_.Type(), kernel_type_str_resolver, out); + return kernel_registry->TryFindKernel(*node, execution_provider_.Type(), kernel_type_str_resolver, logger_, out); } static Status TryCreateKernel(const Node& node, @@ -128,10 +132,11 @@ static Status TryCreateKernel(const Node& node, FuncManager& funcs_mgr, const DataTransferManager& data_transfer_mgr, const ConfigOptions& config_options, + const logging::Logger& logger, /*out*/ std::unique_ptr& op_kernel) { const OpSchemaKernelTypeStrResolver kernel_type_str_resolver{}; const KernelCreateInfo* kernel_create_info = nullptr; - ORT_RETURN_IF_ERROR(kernel_registry.TryFindKernel(node, execution_provider.Type(), kernel_type_str_resolver, + ORT_RETURN_IF_ERROR(kernel_registry.TryFindKernel(node, execution_provider.Type(), kernel_type_str_resolver, logger, &kernel_create_info)); static const AllocatorMap dummy_allocators; @@ -154,7 +159,7 @@ OptimizerExecutionFrame::Info::CreateKernel(const Node* node, const ConfigOption std::shared_ptr kernel_registry = execution_provider_.GetKernelRegistry(); FuncManager func; auto status = TryCreateKernel(*node, *kernel_registry, execution_provider_, initializers_, - ort_value_name_idx_map_, func, data_transfer_mgr_, config_options, + ort_value_name_idx_map_, func, data_transfer_mgr_, config_options, logger_, op_kernel); // Kernel found in the CPU kernel registry diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.h b/onnxruntime/core/optimizer/optimizer_execution_frame.h index b0f7f461661b5..24a23312feba9 100644 --- a/onnxruntime/core/optimizer/optimizer_execution_frame.h +++ b/onnxruntime/core/optimizer/optimizer_execution_frame.h @@ -27,13 +27,15 @@ class OptimizerExecutionFrame final : public IExecutionFrame { const InitializedTensorSet& initialized_tensor_set, const std::filesystem::path& model_path, const IExecutionProvider& execution_provider, - const std::function& is_sparse_initializer_func); + const std::function& is_sparse_initializer_func, + const logging::Logger& logger); Info(const std::vector& nodes, const std::unordered_map& initialized_tensor_set, const std::filesystem::path& model_path, const IExecutionProvider& execution_provider, - const std::function& is_sparse_initializer_func); + const std::function& is_sparse_initializer_func, + const logging::Logger& logger); ~Info() = default; @@ -76,6 +78,7 @@ class OptimizerExecutionFrame final : public IExecutionFrame { std::unique_ptr node_index_info_; const IExecutionProvider& execution_provider_; const std::function& is_sparse_initializer_func_; + const logging::Logger& logger_; ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Info); }; diff --git a/onnxruntime/core/optimizer/qdq_transformer/avx2_weight_s8_to_u8.cc b/onnxruntime/core/optimizer/qdq_transformer/avx2_weight_s8_to_u8.cc index 18e462c04dff3..5538aa54801cc 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/avx2_weight_s8_to_u8.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/avx2_weight_s8_to_u8.cc @@ -36,7 +36,7 @@ static inline bool MatchesOpSinceVersion( return std::find(versions.begin(), versions.end(), node.SinceVersion()) != versions.end(); } -static bool TryConvertDynamicQuantizeLSTM(Node& op_node, Graph& graph) { +static bool TryConvertDynamicQuantizeLSTM(Node& op_node, Graph& graph, const logging::Logger& logger) { constexpr size_t w_idx = 1; constexpr size_t w_zp_idx = 9; constexpr size_t r_idx = 2; @@ -60,7 +60,7 @@ static bool TryConvertDynamicQuantizeLSTM(Node& op_node, Graph& graph) { if (!graph_utils::NodeArgIsConstant(graph, *input_defs[r_idx]) || !graph.GetInitializedTensor(input_defs[r_idx]->Name(), r_tensor_proto) || r_tensor_proto->data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) { - LOGS_DEFAULT(WARNING) << "Unable transforming DynamicQuantizeLSTM operator," + LOGS(logger, WARNING) << "Unable transforming DynamicQuantizeLSTM operator," << " cannot locate recurrence tensor of const int8 type," << " int8 overflow might impact precision !"; return false; @@ -86,7 +86,7 @@ static bool TryConvertDynamicQuantizeLSTM(Node& op_node, Graph& graph) { if (!graph_utils::NodeArgIsConstant(graph, *input_defs[r_zp_idx]) || !graph.GetInitializedTensor(input_defs[r_zp_idx]->Name(), r_zp_tensor_proto) || r_zp_tensor_proto->data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) { - LOGS_DEFAULT(WARNING) << "Unable transforming DynamicQuantizeLSTM operator," + LOGS(logger, WARNING) << "Unable transforming DynamicQuantizeLSTM operator," << " unable to locate recurrence tensor or its zero point value," << " int8 overflow might impact precision !"; return false; @@ -171,7 +171,7 @@ Status Avx2WeightS8ToU8Transformer::ApplyImpl(Graph& graph, bool& modified, int if (graph_utils::IsSupportedOptypeVersionAndDomain( op_node, "DynamicQuantizeLSTM", {1}, kMSDomain)) { // This one has two set of quantized arguments - modified |= TryConvertDynamicQuantizeLSTM(op_node, graph); + modified |= TryConvertDynamicQuantizeLSTM(op_node, graph, logger); continue; // go on to next operator node } diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc index d2240b5d50194..81305f7effa16 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc @@ -291,7 +291,8 @@ SelectorManager::SelectorManager() { InitializeSelectorsMap(); } -std::vector SelectorManager::GetQDQSelections(const GraphViewer& graph_viewer) const { +std::vector SelectorManager::GetQDQSelections(const GraphViewer& graph_viewer, + const logging::Logger& logger) const { std::vector qdq_selections; for (auto index : graph_viewer.GetNodesInTopologicalOrder()) { const auto* node = graph_viewer.GetNode(index); @@ -313,7 +314,7 @@ std::vector SelectorManager::GetQDQSelections(const GraphViewer& grap const auto& versions = op_versions_and_selector.op_versions_map.find(node->OpType())->second; if (!versions.empty()) { if (std::find(versions.cbegin(), versions.cend(), node->SinceVersion()) == versions.cend()) { - LOGS_DEFAULT(VERBOSE) << "Op version is not supported for" << node->OpType(); + LOGS(logger, VERBOSE) << "Op version is not supported for" << node->OpType(); continue; } } @@ -329,7 +330,7 @@ std::vector SelectorManager::GetQDQSelections(const GraphViewer& grap } std::pair>, std::unordered_map> -GetAllNodeUnits(const GraphViewer& graph_viewer) { +GetAllNodeUnits(const GraphViewer& graph_viewer, const logging::Logger& logger) { std::vector> node_unit_holder; std::unordered_map node_unit_map; @@ -342,7 +343,7 @@ GetAllNodeUnits(const GraphViewer& graph_viewer) { // Get QDQ NodeUnits first QDQ::SelectorManager selector_mgr; - const auto qdq_selections = selector_mgr.GetQDQSelections(graph_viewer); + const auto qdq_selections = selector_mgr.GetQDQSelections(graph_viewer, logger); for (const auto& qdq_selection : qdq_selections) { auto qdq_unit = std::make_unique(graph_viewer, qdq_selection); diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.h index f388206551172..ccc1844e3e985 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.h @@ -15,7 +15,9 @@ #endif namespace onnxruntime { - +namespace logging { +class Logger; +} class GraphViewer; class Node; @@ -65,7 +67,7 @@ class SelectorManager { // Methods that finds and returns a vector of QDQ::NodeGroup in a given graph // Can be used in QDQ support in different EPs - std::vector GetQDQSelections(const GraphViewer& graph_viewer) const; + std::vector GetQDQSelections(const GraphViewer& graph_viewer, const logging::Logger& logger) const; private: Selectors qdq_selectors_; @@ -88,7 +90,7 @@ class SelectorManager { // We currently have a bit of a mess with generic things like this to get all the node units being in the optimizer // library whereas it should be able to be used by an EP with no dependency on optimizers. std::pair>, std::unordered_map> -GetAllNodeUnits(const GraphViewer& graph_viewer); +GetAllNodeUnits(const GraphViewer& graph_viewer, const logging::Logger& logger); } // namespace QDQ } // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/transformer_memcpy.cc b/onnxruntime/core/optimizer/transformer_memcpy.cc index 6a5a85ce0ff31..8c0136c495403 100644 --- a/onnxruntime/core/optimizer/transformer_memcpy.cc +++ b/onnxruntime/core/optimizer/transformer_memcpy.cc @@ -17,13 +17,22 @@ class TransformerMemcpyImpl { TransformerMemcpyImpl(onnxruntime::Graph& graph, const std::string& provider) : graph_(graph), provider_(provider) {} - bool ModifyGraph(const KernelRegistryManager& schema_registries, const logging::Logger& logger, int& copy_node_counter); + bool ModifyGraph(const KernelRegistryManager& schema_registries, + const logging::Logger& logger, + int& copy_node_counter); private: - void ProcessDefs(onnxruntime::Node& node, const KernelRegistryManager& kernel_registries, InitializedTensorSet& initializers_consumed); - void BuildDefsMapping(const onnxruntime::NodeArg* arg, const KernelRegistryManager& kernel_registries); + void ProcessDefs(onnxruntime::Node& node, + const KernelRegistryManager& kernel_registries, + InitializedTensorSet& initializers_consumed, + const logging::Logger& logger); + void BuildDefsMapping(const onnxruntime::NodeArg* arg, + const KernelRegistryManager& kernel_registries, + const logging::Logger& logger); void AddCopyNode(onnxruntime::NodeArg* arg, bool is_input, const logging::Logger& logger); - bool ProcessInitializers(const KernelRegistryManager& kernel_registries, const InitializedTensorSet& initializers_consumed); + bool ProcessInitializers(const KernelRegistryManager& kernel_registries, + const InitializedTensorSet& initializers_consumed, + const logging::Logger& logger); private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TransformerMemcpyImpl); @@ -130,21 +139,21 @@ bool TransformerMemcpyImpl::ModifyGraph(const KernelRegistryManager& kernel_regi // find defs that require copy for (auto& node : graph_.Nodes()) { // as we process the defs, collect all the initializers consumed at the current graph level - ProcessDefs(node, kernel_registries, initializers_consumed); + ProcessDefs(node, kernel_registries, initializers_consumed, logger); } // for initializers shared by different providers, create dups - if (ProcessInitializers(kernel_registries, initializers_consumed)) + if (ProcessInitializers(kernel_registries, initializers_consumed, logger)) modified = true; for (auto arg : graph_.GetInputs()) - BuildDefsMapping(arg, kernel_registries); + BuildDefsMapping(arg, kernel_registries, logger); for (auto arg : non_provider_input_defs_) - BuildDefsMapping(arg, kernel_registries); + BuildDefsMapping(arg, kernel_registries, logger); for (auto arg : non_provider_output_defs_) - BuildDefsMapping(arg, kernel_registries); + BuildDefsMapping(arg, kernel_registries, logger); for (auto arg : graph_.GetInputs()) // For inputs we need to create a copy node only when the input is connected to both provider @@ -202,8 +211,10 @@ bool TransformerMemcpyImpl::ModifyGraph(const KernelRegistryManager& kernel_regi return modified; } -void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelRegistryManager& kernel_registries, - InitializedTensorSet& initializers_consumed) { +void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, + const KernelRegistryManager& kernel_registries, + InitializedTensorSet& initializers_consumed, + const logging::Logger& logger) { auto node_provider_type = node.GetExecutionProviderType(); if ((node_provider_type == provider_) || (node_provider_type == kCudaExecutionProvider && kTensorrtExecutionProvider == provider_) || @@ -211,7 +222,7 @@ void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelReg provider_nodes_.insert(&node); // note KernelCreateInfo might be nullptr for custom kernel const KernelCreateInfo* kci = nullptr; - ORT_IGNORE_RETURN_VALUE(kernel_registries.SearchKernelRegistry(node, &kci)); + ORT_IGNORE_RETURN_VALUE(kernel_registries.SearchKernelRegistry(node, logger, &kci)); bool is_implicit_input = false; auto process_inputs = @@ -278,7 +289,9 @@ void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelReg } // for non_provider defs, collect the nodes that expect it is provider tensor as input/output. -void TransformerMemcpyImpl::BuildDefsMapping(const onnxruntime::NodeArg* arg, const KernelRegistryManager& kernel_registries) { +void TransformerMemcpyImpl::BuildDefsMapping(const onnxruntime::NodeArg* arg, + const KernelRegistryManager& kernel_registries, + const logging::Logger& logger) { for (auto& it : graph_.Nodes()) { if (it.OpType() == "MemcpyFromHost" || it.OpType() == "MemcpyToHost") continue; auto input_it = @@ -296,7 +309,7 @@ void TransformerMemcpyImpl::BuildDefsMapping(const onnxruntime::NodeArg* arg, co (node_provider_type == kCudaExecutionProvider && kTensorrtExecutionProvider == provider_) || (node_provider_type == kRocmExecutionProvider && kMIGraphXExecutionProvider == provider_)) { const KernelCreateInfo* kci = nullptr; - ORT_IGNORE_RETURN_VALUE(kernel_registries.SearchKernelRegistry(it, &kci)); + ORT_IGNORE_RETURN_VALUE(kernel_registries.SearchKernelRegistry(it, logger, &kci)); if (arg_input_index != -1) { if (!kci || !utils::IsInputOnCpu(it, kci, arg_input_index)) provider_input_nodes_[arg].insert(&it); } @@ -351,7 +364,9 @@ static const onnxruntime::NodeArg* FindNodeArg(const NodeArgSetType& def_set, co // We duplicate any initializer that is used by both provider nodes and non-provider nodes // to ensure that provider nodes and non-provider nodes don't share initializers, as they // need to stay in different memory locations. -bool TransformerMemcpyImpl::ProcessInitializers(const KernelRegistryManager& kernel_registries, const InitializedTensorSet& initializers_consumed) { +bool TransformerMemcpyImpl::ProcessInitializers(const KernelRegistryManager& kernel_registries, + const InitializedTensorSet& initializers_consumed, + const logging::Logger& logger) { std::map replacements; for (const auto& pair : initializers_consumed) { const auto& name = pair.first; @@ -383,7 +398,7 @@ bool TransformerMemcpyImpl::ProcessInitializers(const KernelRegistryManager& ker auto dup_replacements = replacements; const KernelCreateInfo* kci = nullptr; - auto status = kernel_registries.SearchKernelRegistry(*p_node, &kci); + auto status = kernel_registries.SearchKernelRegistry(*p_node, logger, &kci); ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); if (kci == nullptr) continue; if (kci->kernel_def == nullptr) continue; diff --git a/onnxruntime/core/providers/cann/cann_execution_provider.cc b/onnxruntime/core/providers/cann/cann_execution_provider.cc index a799ed743ef52..f954baf3eabae 100644 --- a/onnxruntime/core/providers/cann/cann_execution_provider.cc +++ b/onnxruntime/core/providers/cann/cann_execution_provider.cc @@ -1288,15 +1288,15 @@ CANNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewe const KernelCreateInfo* cann_kernel_def = kernel_lookup.LookUpKernel(node); if (cann_kernel_def == nullptr) { - LOGS_DEFAULT(INFO) << "CANN kernel not found in registries for Op type: " << node.OpType() - << " node name: " << node.Name(); + LOGS(*GetLogger(), INFO) << "CANN kernel not found in registries for Op type: " << node.OpType() + << " node name: " << node.Name(); continue; } candidates.push_back(node.Index()); } - auto cpu_nodes = GetCpuPreferredNodes(graph_viewer, kernel_lookup, candidates); + auto cpu_nodes = GetCpuPreferredNodes(graph_viewer, kernel_lookup, candidates, *GetLogger()); for (auto& node_index : candidates) { if (cpu_nodes.count(node_index) > 0) continue; diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index 8396e2629d2bf..d4013a7dc3d57 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -2693,7 +2693,7 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph, // For CUDA EP, exclude the subgraph that is preferred to be placed in CPU // These are usually shape related computation subgraphs // Following logic can be extended for other EPs - auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes); + auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes, logger); std::vector> result; for (auto& node_index : candidates) { if (cpu_nodes.count(node_index) > 0) diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp index 35a2c451a49a5..9f95818501dac 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionTransformer.cpp @@ -62,7 +62,8 @@ namespace Dml const auto kernel_type_str_resolver = onnxruntime::OpSchemaKernelTypeStrResolver{}; const auto kernel_lookup = onnxruntime::KernelLookup{provider_type, gsl::make_span(®istry, 1), - kernel_type_str_resolver}; + kernel_type_str_resolver, + logger}; std::vector> compiledPartitionInfos; std::vector additionalSplittingNodes; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp index 6318b0d5e2865..b9b90d6bc17bd 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeGraphFusionTransformer.cpp @@ -54,7 +54,8 @@ namespace Dml const auto kernelLookup = onnxruntime::KernelLookup( providerType, gsl::make_span(®istry, 1), - kernelTypeStrResolver); + kernelTypeStrResolver, + logger); onnxruntime::GraphViewer graphViewer(graph); const auto& nodeTopologyList = graphViewer.GetNodesInTopologicalOrder(); diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp index 228dfeb123175..826f48b5f7a68 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp @@ -95,7 +95,7 @@ namespace Dml const onnxruntime::IExecutionProvider::IKernelLookup& kernel_lookup) const { #ifdef ENABLE_GRAPH_COMPILATION - return m_impl->GetCapability(graph, kernel_lookup); + return m_impl->GetCapability(graph, kernel_lookup, *GetLogger()); #else return onnxruntime::IExecutionProvider::GetCapability(graph, kernel_lookup); #endif @@ -876,7 +876,8 @@ namespace Dml std::vector> ExecutionProviderImpl::GetCapability( const onnxruntime::GraphViewer& graph, - const onnxruntime::IExecutionProvider::IKernelLookup& kernel_lookup) const + const onnxruntime::IExecutionProvider::IKernelLookup& kernel_lookup, + const onnxruntime::logging::Logger& logger) const { uint32_t deviceDataTypeMask = GetSupportedDeviceDataTypeMask(); // Each bit corresponds to each DML_TENSOR_DATA_TYPE. @@ -900,7 +901,7 @@ namespace Dml } // Get the list of nodes that should stay on the CPU - auto cpuPreferredNodes = GetCpuPreferredNodes(graph, kernel_lookup, tentativeNodes); + auto cpuPreferredNodes = GetCpuPreferredNodes(graph, kernel_lookup, tentativeNodes, logger); for (size_t nodeIndex : toplogicalOrder) { diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h index 32a5b9add35a0..e7d859c5764de 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.h @@ -88,7 +88,8 @@ namespace Dml std::vector> GetCapability( const onnxruntime::GraphViewer& graph, - const onnxruntime::IExecutionProvider::IKernelLookup& kernel_lookup + const onnxruntime::IExecutionProvider::IKernelLookup& kernel_lookup, + const onnxruntime::logging::Logger& logger ) const; uint32_t GetSupportedDeviceDataTypeMask() const; diff --git a/onnxruntime/core/providers/js/js_execution_provider.cc b/onnxruntime/core/providers/js/js_execution_provider.cc index 4cb40ec8bf5fd..c1a8b373bed84 100644 --- a/onnxruntime/core/providers/js/js_execution_provider.cc +++ b/onnxruntime/core/providers/js/js_execution_provider.cc @@ -818,7 +818,7 @@ std::vector> JsExecutionProvider::GetCapabili candidates.push_back(node.Index()); tenative_candidates.push_back(node.Index()); } - auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tenative_candidates); + auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tenative_candidates, *GetLogger()); std::vector> result; for (auto& node_index : candidates) { if (cpu_nodes.count(node_index) > 0) { diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc index 12416ea0c121b..e4bee6f959a01 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc @@ -32,8 +32,16 @@ namespace nnapi { ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle, gsl::span nnapi_target_devices, - TargetDeviceOption target_device_option) - : nnapi_(nnapi_handle), graph_viewer_(graph_viewer), nnapi_model_{std::make_unique(nnapi_handle)}, shaper_{graph_viewer}, nnapi_target_devices_(nnapi_target_devices), target_device_option_(target_device_option), nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)) { + TargetDeviceOption target_device_option, + const logging::Logger& logger) + : nnapi_(nnapi_handle), + graph_viewer_(graph_viewer), + nnapi_model_{std::make_unique(nnapi_handle)}, + shaper_{graph_viewer}, + nnapi_target_devices_(nnapi_target_devices), + target_device_option_(target_device_option), + nnapi_effective_feature_level_(GetNNAPIEffectiveFeatureLevel(nnapi_handle, nnapi_target_devices_)), + logger_(logger) { nnapi_model_->nnapi_effective_feature_level_ = nnapi_effective_feature_level_; } @@ -136,7 +144,7 @@ const NodeUnit& ModelBuilder::GetNodeUnit(const Node* node) const { } void ModelBuilder::PreprocessNodeUnits() { - std::tie(node_unit_holder_, node_unit_map_) = QDQ::GetAllNodeUnits(graph_viewer_); + std::tie(node_unit_holder_, node_unit_map_) = QDQ::GetAllNodeUnits(graph_viewer_, logger_); } // Help to get all quantized operators' input and the NodeUnit(s) using the input diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h index b2118150dd304..4db335afa98b0 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.h @@ -14,7 +14,9 @@ struct NnApi; namespace onnxruntime { - +namespace logging { +class Logger; +} class GraphViewer; enum class DataLayout; class NodeUnit; @@ -31,7 +33,8 @@ class ModelBuilder { using Shape = Shaper::Shape; ModelBuilder(const GraphViewer& graph_viewer, const NnApi& nnapi_handle, - gsl::span nnapi_target_devices, TargetDeviceOption target_device_option); + gsl::span nnapi_target_devices, TargetDeviceOption target_device_option, + const logging::Logger& logger); common::Status Compile(std::unique_ptr& model); @@ -173,6 +176,9 @@ class ModelBuilder { // <1,1> <1,2> <1,3> InlinedVector> operations_recorder_; #endif + + const logging::Logger& logger_; + // Convert the ONNX model to ANeuralNetworksModel common::Status Prepare(); diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc index fca52396a190c..f92c9592742d5 100644 --- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc +++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc @@ -81,6 +81,7 @@ NnapiExecutionProvider::~NnapiExecutionProvider() {} std::vector> NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer, const IKernelLookup& /*kernel_lookup*/) const { + const auto& logger = *GetLogger(); std::vector> result; // TODO: Task 812756: NNAPI EP, add support for subgraph (If and Loop operators) @@ -101,7 +102,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view return ORT_NNAPI_MAX_SUPPORTED_API_LEVEL; #endif }(); - LOGS_DEFAULT(VERBOSE) << "Effective NNAPI feature level: " << android_feature_level; + LOGS(logger, VERBOSE) << "Effective NNAPI feature level: " << android_feature_level; const nnapi::OpSupportCheckParams params{ android_feature_level, @@ -109,7 +110,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view }; if (params.android_feature_level < ORT_NNAPI_MIN_API_LEVEL) { - LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because system NNAPI feature level [" + LOGS(logger, WARNING) << "All ops will fallback to CPU EP, because system NNAPI feature level [" << params.android_feature_level << "] is lower than minimal supported NNAPI API feature level [" << ORT_NNAPI_MIN_API_LEVEL @@ -121,7 +122,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); // This holds the result of whether a NodeUnit is supported or not, // to prevent nodes in a NodeUnit to be checked for multiple times @@ -150,7 +151,7 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view node_unit_supported_result[node_unit] = supported; } - LOGS_DEFAULT(VERBOSE) << "Node supported: [" << supported + LOGS(logger, VERBOSE) << "Node supported: [" << supported << "] Operator type: [" << node.OpType() << "] index: [" << node.Index() << "] name: [" << node.Name() @@ -224,9 +225,9 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view // If the graph is partitioned in multiple subgraphs, and this may impact performance, // we want to give users a summary message at warning level. if (num_of_partitions > 1) { - LOGS_DEFAULT(WARNING) << summary_msg; + LOGS(logger, WARNING) << summary_msg; } else { - LOGS_DEFAULT(INFO) << summary_msg; + LOGS(logger, INFO) << summary_msg; } return result; @@ -273,11 +274,13 @@ static Status GetOutputBuffer(Ort::KernelContext& context, common::Status NnapiExecutionProvider::Compile(const std::vector& fused_nodes_and_graphs, std::vector& node_compute_funcs) { using namespace android::nn::wrapper; + const auto& logger = *GetLogger(); + for (const auto& fused_node_and_graph : fused_nodes_and_graphs) { Node& fused_node = fused_node_and_graph.fused_node; const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph); - nnapi::ModelBuilder builder(graph_viewer, *nnapi_handle_, nnapi_target_devices_, target_device_option_); + nnapi::ModelBuilder builder(graph_viewer, *nnapi_handle_, nnapi_target_devices_, target_device_option_, logger); builder.SetUseNCHW(nnapi_flags_ & NNAPI_FLAG_USE_NCHW); builder.SetUseFp16(nnapi_flags_ & NNAPI_FLAG_USE_FP16); diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc index f1df1abf4c49a..decfe91c598be 100644 --- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc +++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc @@ -687,7 +687,7 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph, // Get all the NodeUnits in the graph_viewer std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(&src_graph); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(&src_graph, logger); std::unordered_set seen_node_units; const auto& node_indices = src_graph.GetNodesInTopologicalOrder(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model.cc b/onnxruntime/core/providers/qnn/builder/qnn_model.cc index 88fa6429fc01e..75973c7031d62 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model.cc @@ -104,7 +104,7 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer, // valid throughout the lifetime of the ModelBuilder std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); // This name must be same with the EPContext node name const auto& graph_name = fused_node.Name(); diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 3bb069196e31c..060bbd4f79bf2 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -718,7 +718,7 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); // remove is_qnn_ctx_model related code const auto supported_nodes = GetSupportedNodes(graph_viewer, node_unit_map, diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider.cc b/onnxruntime/core/providers/rocm/rocm_execution_provider.cc index 75b8ac7e134f3..0a427b146dcaa 100644 --- a/onnxruntime/core/providers/rocm/rocm_execution_provider.cc +++ b/onnxruntime/core/providers/rocm/rocm_execution_provider.cc @@ -2493,7 +2493,7 @@ ROCMExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph, // For ROCM EP, exclude the subgraph that is preferred to be placed in CPU // These are usually shape related computation subgraphs // Following logic can be extended for other EPs - auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes); + auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes, logger); std::vector> result; for (auto& node_index : candidates) { if (cpu_nodes.count(node_index) > 0) diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index b84825236a453..45f81ed22b7f7 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -294,7 +294,8 @@ std::unique_ptr CreateGPUDataTransfer(); std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewer& graph, const IExecutionProvider::IKernelLookup& kernel_lookup, - gsl::span tentative_nodes); + gsl::span tentative_nodes, + const logging::Logger& logger); std::string GetEnvironmentVar(const std::string& var_name); @@ -371,8 +372,8 @@ constexpr ONNXTensorElementDataType GetONNXTensorElementDataType() { namespace QDQ { inline std::pair>, std::unordered_map> -GetAllNodeUnits(const GraphViewer* graph_viewer) { - return g_host->QDQ__GetAllNodeUnits(graph_viewer); +GetAllNodeUnits(const GraphViewer* graph_viewer, const logging::Logger& logger) { + return g_host->QDQ__GetAllNodeUnits(graph_viewer, logger); } } // namespace QDQ diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index d3b12f9728135..aa8c367d25d51 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -369,8 +369,9 @@ std::string GetEnvironmentVar(const std::string& var_name) { std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewer& graph, const IExecutionProvider::IKernelLookup& kernel_lookup, - gsl::span tentative_nodes) { - return g_host->GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes); + gsl::span tentative_nodes, + const logging::Logger& logger) { + return g_host->GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes, logger); } namespace profiling { diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index f9f2bb69a9d1a..7ab93d56cfe26 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -202,7 +202,8 @@ struct ProviderHost { virtual std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewer& graph, const IExecutionProvider::IKernelLookup& kernel_lookup, - gsl::span tentative_nodes) = 0; + gsl::span tentative_nodes, + const logging::Logger& logger) = 0; virtual Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) = 0; virtual Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ float* p_data, size_t expected_size) = 0; @@ -890,7 +891,7 @@ struct ProviderHost { virtual std::unique_ptr NodeUnit__OutputEdgesEnd(const NodeUnit* p) = 0; virtual std::pair>, std::unordered_map> - QDQ__GetAllNodeUnits(const GraphViewer* graph_viewer) = 0; + QDQ__GetAllNodeUnits(const GraphViewer* graph_viewer, const logging::Logger& logger) = 0; // Model virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc index bbf8255ac2940..db8a87d9eaf24 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc +++ b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc @@ -34,7 +34,8 @@ namespace onnxruntime { namespace vsi { namespace npu { -GraphEP::GraphEP(const onnxruntime::GraphViewer& graph_viewer) : graph_viewer_(graph_viewer) { +GraphEP::GraphEP(const onnxruntime::GraphViewer& graph_viewer, const logging::Logger& logger) + : graph_viewer_(graph_viewer), logger_(logger) { Prepare(); context_ = tim::vx::Context::Create(); graph_ = context_->CreateGraph(); @@ -42,7 +43,7 @@ GraphEP::GraphEP(const onnxruntime::GraphViewer& graph_viewer) : graph_viewer_(g } bool GraphEP::Prepare() { - std::tie(node_unit_holder_, node_unit_map_) = QDQ::GetAllNodeUnits(graph_viewer_); + std::tie(node_unit_holder_, node_unit_map_) = QDQ::GetAllNodeUnits(graph_viewer_, logger_); for (const auto& node_unit : node_unit_holder_) { auto quant_op_type = util::GetQuantizedOpType(*node_unit); diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h index 49344770d060e..5bb332fad0177 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h +++ b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.h @@ -51,7 +51,7 @@ struct NodeIOInfo { class GraphEP { public: - explicit GraphEP(const GraphViewer& graph_viewer); + explicit GraphEP(const GraphViewer& graph_viewer, const logging::Logger& logger); ~GraphEP() {} bool Prepare(); @@ -104,6 +104,7 @@ class GraphEP { // In the form of {input_name, [NodeUnit(s) using the input]} std::unordered_map> all_quantized_op_inputs_; const GraphViewer& graph_viewer_; + const logging::Logger& logger_; // Holder for the NodeUnits in the graph, this will guarantee the NodeUnits is // valid throughout the lifetime of the ModelBuilder diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_execution_provider.cc b/onnxruntime/core/providers/vsinpu/vsinpu_execution_provider.cc index 669c702544de8..7da7cc6cb63ba 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_execution_provider.cc +++ b/onnxruntime/core/providers/vsinpu/vsinpu_execution_provider.cc @@ -62,6 +62,7 @@ VSINPUExecutionProvider::~VSINPUExecutionProvider() {} std::vector> VSINPUExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer, const IKernelLookup& /*kernel_lookup*/) const { + const auto& logger = *GetLogger(); std::vector> result; if (graph_viewer.IsSubgraph()) { @@ -82,7 +83,7 @@ VSINPUExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie // Get all the NodeUnits in the graph_viewer std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); // This holds the result of whether a NodeUnit is supported or not, // to prevent nodes in a NodeUnit to be checked for multiple times @@ -174,7 +175,8 @@ VSINPUExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie } Status ComputeStateFunc(vsi::npu::GraphEP* graph_ep, - OrtKernelContext* context) { + OrtKernelContext* context, + const logging::Logger& logger) { Ort::KernelContext ctx(context); size_t num_in = ctx.GetInputCount(); const size_t num_inputs = graph_ep->GetGraphInputs().size(); @@ -192,7 +194,7 @@ Status ComputeStateFunc(vsi::npu::GraphEP* graph_ep, } if (!graph_ep->GetGraph()->Run()) { - LOGS_DEFAULT(ERROR) << "Failed to run graph."; + LOGS(logger, ERROR) << "Failed to run graph."; } for (size_t i = 0; i < ctx.GetOutputCount(); i++) { auto timvx_tensor = graph_ep->GetGraphOutputs()[i]->tensor; @@ -207,12 +209,14 @@ Status ComputeStateFunc(vsi::npu::GraphEP* graph_ep, Status VSINPUExecutionProvider::Compile(const std::vector& fused_nodes_and_graphs, std::vector& node_compute_funcs) { + const auto& logger = *GetLogger(); + for (const auto& fused_node_graph : fused_nodes_and_graphs) { const GraphViewer& graph_viewer = fused_node_graph.filtered_graph; - std::shared_ptr graph_ep = std::make_shared(graph_viewer); + std::shared_ptr graph_ep = std::make_shared(graph_viewer, logger); for (auto tensor : graph_viewer.GetInputsIncludingInitializers()) { - LOGS_DEFAULT(VERBOSE) << "subgraph input init:" << vsi::npu::util::PrintNode(*tensor) << "#" + LOGS(logger, VERBOSE) << "subgraph input init:" << vsi::npu::util::PrintNode(*tensor) << "#" << graph_viewer.IsInitializedTensor(tensor->Name()); auto input = std::make_shared(); input->name = tensor->Name(); @@ -220,7 +224,7 @@ Status VSINPUExecutionProvider::Compile(const std::vector& fu graph_ep->GetGraphInputs().push_back(input); } for (auto tensor : graph_viewer.GetOutputs()) { - LOGS_DEFAULT(VERBOSE) << "subgraph output:" << vsi::npu::util::PrintNode(*tensor); + LOGS(logger, VERBOSE) << "subgraph output:" << vsi::npu::util::PrintNode(*tensor); auto output = std::make_shared(); output->name = tensor->Name(); output->is_initializer = false; @@ -236,16 +240,16 @@ Status VSINPUExecutionProvider::Compile(const std::vector& fu if (node != &node_unit.GetNode()) { continue; } - LOGS_DEFAULT(VERBOSE) << "Adding node: [" << node->OpType() << "]"; + LOGS(logger, VERBOSE) << "Adding node: [" << node->OpType() << "]"; vsi::npu::SupportedBuiltinOps().at(node->OpType())->BuildOp(graph_ep.get(), graph_viewer, node_unit); } - LOGS_DEFAULT(INFO) << "Verifying graph"; + LOGS(logger, INFO) << "Verifying graph"; graph_ep->GetCompiled() = graph_ep->GetGraph()->Compile(); if (!graph_ep->GetCompiled()) { - LOGS_DEFAULT(ERROR) << "Failed to verify graph."; + LOGS(logger, ERROR) << "Failed to verify graph."; } else { - LOGS_DEFAULT(INFO) << "Graph has been verified successfully."; + LOGS(logger, INFO) << "Graph has been verified successfully."; } NodeComputeInfo compute_info; @@ -259,7 +263,7 @@ Status VSINPUExecutionProvider::Compile(const std::vector& fu [graph_ep, this](FunctionState /*state*/, const OrtApi* /* api */, OrtKernelContext* context) { std::lock_guard lock(this->GetMutex()); - Status res = ComputeStateFunc(graph_ep.get(), context); + Status res = ComputeStateFunc(graph_ep.get(), context, *GetLogger()); return res; }; diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc index 90b6862758bd7..66209adf6f1a9 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -798,7 +798,8 @@ std::vector> WebGpuExecutionProvider::GetCapa candidates.push_back(node.Index()); tenative_candidates.push_back(node.Index()); } - auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tenative_candidates); + + auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tenative_candidates, *GetLogger()); std::vector> result; for (auto& node_index : candidates) { if (cpu_nodes.count(node_index) > 0) { diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc index 12e567e7080b3..ee4e7be0f1f49 100644 --- a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc +++ b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc @@ -258,6 +258,7 @@ static void AddComputeCapabilityForEachNodeInNodeUnit( std::vector> XnnpackExecutionProvider::GetCapability( const onnxruntime::GraphViewer& graph, const IKernelLookup& /*kernel_lookup*/) const { + const auto& logger = *GetLogger(); std::vector> capabilities; std::shared_ptr registry = GetKernelRegistry(); @@ -268,7 +269,7 @@ std::vector> XnnpackExecutionProvider::GetCap // Get all the NodeUnits in the GraphViewer so we can check if something is in a QDQ node group std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph, logger); // This holds the result of whether a NodeUnit is supported or not, // to prevent nodes in a NodeUnit being checked for multiple times diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 2ff9fa525fa3b..a60ee500a9898 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -1644,7 +1644,7 @@ Status ApplyOrtFormatModelRuntimeOptimizations( level <= static_cast(session_options.graph_optimization_level); ++level) { const auto transformers = optimizer_utils::GenerateTransformersForMinimalBuild( - static_cast(level), session_options, SatRuntimeOptimizationLoadContext{}, cpu_ep, + static_cast(level), session_options, SatRuntimeOptimizationLoadContext{}, cpu_ep, logger, optimizers_to_disable, intra_op_thread_pool, p_buffered_tensors); for (const auto& transformer : transformers) { @@ -1840,7 +1840,8 @@ common::Status InferenceSession::Initialize() { ORT_RETURN_IF_ERROR_SESSIONID_(AddPredefinedTransformers(graph_transformer_mgr_, session_options_.graph_optimization_level, minimal_build_optimization_handling, - record_runtime_optimization_produced_op_schema)); + record_runtime_optimization_produced_op_schema, + *session_logger_)); #ifdef USE_DML const IExecutionProvider* dmlExecutionProvider = execution_providers_.Get(kDmlExecutionProvider); @@ -2112,7 +2113,7 @@ common::Status InferenceSession::Initialize() { std::vector tuning_results; bool found_tuning_results = false; ORT_RETURN_IF_ERROR_SESSIONID_(inference_session_utils::ParseTuningResultsFromModelMetadata( - model_metadata_, tuning_results, found_tuning_results)); + model_metadata_, tuning_results, found_tuning_results, *session_logger_)); if (found_tuning_results) { ORT_RETURN_IF_ERROR_SESSIONID_(SetTuningResults(tuning_results, /*error_on_invalid*/ false, /*auto_enable*/ true)); } @@ -3233,7 +3234,8 @@ common::Status InferenceSession::AddPredefinedTransformers( GraphTransformerManager& transformer_manager, TransformerLevel graph_optimization_level, MinimalBuildOptimizationHandling minimal_build_optimization_handling, - RecordRuntimeOptimizationProducedNodeOpSchemaFn record_runtime_optimization_produced_op_schema_fn) const { + RecordRuntimeOptimizationProducedNodeOpSchemaFn record_runtime_optimization_produced_op_schema_fn, + const logging::Logger& logger) const { const auto& cpu_ep = *execution_providers_.Get(onnxruntime::kCpuExecutionProvider); for (int i = static_cast(TransformerLevel::Level1); i <= static_cast(TransformerLevel::MaxLevel); i++) { TransformerLevel level = static_cast(i); @@ -3245,7 +3247,7 @@ common::Status InferenceSession::AddPredefinedTransformers( minimal_build_optimization_handling == MinimalBuildOptimizationHandling::ApplyFullBuildOptimizations; if (use_full_build_optimizations) { - return optimizer_utils::GenerateTransformers(level, session_options_, cpu_ep, + return optimizer_utils::GenerateTransformers(level, session_options_, cpu_ep, logger, optimizers_to_disable_, GetIntraOpThreadPoolToUse(), session_state_->GetMutableBufferedTensors()); @@ -3257,6 +3259,7 @@ common::Status InferenceSession::AddPredefinedTransformers( record_runtime_optimization_produced_op_schema_fn}} : SatApplyContextVariant{SatDirectApplicationContext{}}; return optimizer_utils::GenerateTransformersForMinimalBuild(level, session_options_, sat_context, cpu_ep, + logger, optimizers_to_disable_, GetIntraOpThreadPoolToUse(), session_state_->GetMutableBufferedTensors()); diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h index 0675f64848fd0..e28ff75345785 100644 --- a/onnxruntime/core/session/inference_session.h +++ b/onnxruntime/core/session/inference_session.h @@ -690,8 +690,9 @@ class InferenceSession { * If we encounter an invalid request, we return an error * back to the user. */ - [[nodiscard]] common::Status ValidateAndParseShrinkArenaString(const std::string& ort_device_list, - /*out*/ InlinedVector& arenas_to_shrink) const; + [[nodiscard]] common::Status ValidateAndParseShrinkArenaString( + const std::string& ort_device_list, + /*out*/ InlinedVector& arenas_to_shrink) const; /* * Performs the shrinkage of arenas requested to be shrunk by the user @@ -708,7 +709,8 @@ class InferenceSession { GraphTransformerManager& transformer_manager, TransformerLevel graph_optimization_level, MinimalBuildOptimizationHandling minimal_build_optimization_handling, - RecordRuntimeOptimizationProducedNodeOpSchemaFn record_runtime_optimization_produced_op_schema_fn) const; + RecordRuntimeOptimizationProducedNodeOpSchemaFn record_runtime_optimization_produced_op_schema_fn, + const logging::Logger& logger) const; common::Status TransformGraph(onnxruntime::Graph& graph, bool saving_model_in_ort_format); diff --git a/onnxruntime/core/session/inference_session_utils.cc b/onnxruntime/core/session/inference_session_utils.cc index 3436eebda3819..8b9de0c604441 100644 --- a/onnxruntime/core/session/inference_session_utils.cc +++ b/onnxruntime/core/session/inference_session_utils.cc @@ -236,7 +236,8 @@ Status JsonConfigParser::ParseRunOptionsFromModelProto(RunOptions& /*run_options Status ParseTuningResultsFromModelMetadata(const onnxruntime::ModelMetadata& metadata, std::vector& results, - bool& key_found) { + bool& key_found, + const logging::Logger& logger) { results.clear(); key_found = false; auto it = metadata.custom_metadata_map.find(kTuningResultsKeys); @@ -245,7 +246,7 @@ Status ParseTuningResultsFromModelMetadata(const onnxruntime::ModelMetadata& met } key_found = true; - LOGS_DEFAULT(INFO) << "Found tuning results in the model file to be used while loading the model"; + LOGS(logger, INFO) << "Found tuning results in the model file to be used while loading the model"; Status status; ORT_TRY { diff --git a/onnxruntime/core/session/inference_session_utils.h b/onnxruntime/core/session/inference_session_utils.h index a0bcdb9013bf0..f297d928f8a0d 100644 --- a/onnxruntime/core/session/inference_session_utils.h +++ b/onnxruntime/core/session/inference_session_utils.h @@ -19,7 +19,9 @@ using json = nlohmann::json; #endif namespace onnxruntime { - +namespace logging { +class Logger; +} namespace inference_session_utils { // need this value to be accessible in all builds in order to report error for attempted usage in a minimal build @@ -60,7 +62,8 @@ class JsonConfigParser { Status ParseTuningResultsFromModelMetadata(const onnxruntime::ModelMetadata& metadata, /*out*/ std::vector& results, - /*out*/ bool& key_found); + /*out*/ bool& key_found, + const logging::Logger& logger); #endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index d55fd34d5a8f2..c3832498af584 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -279,8 +279,9 @@ struct ProviderHostImpl : ProviderHost { std::unordered_set GetCpuPreferredNodes(const onnxruntime::GraphViewer& graph, const IExecutionProvider::IKernelLookup& kernel_lookup, - gsl::span tentative_nodes) override { - return onnxruntime::GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes); + gsl::span tentative_nodes, + const logging::Logger& logger) override { + return onnxruntime::GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes, logger); } Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) override { return utils::UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); } @@ -1057,8 +1058,8 @@ struct ProviderHostImpl : ProviderHost { } std::pair>, std::unordered_map> - QDQ__GetAllNodeUnits(const GraphViewer* graph_viewer) override { - return QDQ::GetAllNodeUnits(*graph_viewer); + QDQ__GetAllNodeUnits(const GraphViewer* graph_viewer, const logging::Logger& logger) override { + return QDQ::GetAllNodeUnits(*graph_viewer, logger); } // Model (wrapped) diff --git a/onnxruntime/core/session/standalone_op_invoker.cc b/onnxruntime/core/session/standalone_op_invoker.cc index 9cbf01946e92b..2706448d831cc 100644 --- a/onnxruntime/core/session/standalone_op_invoker.cc +++ b/onnxruntime/core/session/standalone_op_invoker.cc @@ -314,7 +314,8 @@ class StandAloneKernelContext : public OpKernelContext { AllocatorPtr allocator_; }; // StandAloneKernelContext -onnxruntime::Status CreateOpAttr(const char* name, const void* data, int len, OrtOpAttrType type, OrtOpAttr** op_attr) { +onnxruntime::Status CreateOpAttr(const char* name, const void* data, int len, OrtOpAttrType type, + OrtOpAttr** op_attr) { auto attr = std::make_unique(); onnxruntime::Status status = onnxruntime::Status::OK(); attr->set_name(std::string{name}); @@ -410,7 +411,9 @@ onnxruntime::Status CreateOp(_In_ const OrtKernelInfo* info, node_ptr->SetSinceVersion(version); - auto status = kernel_registry->TryFindKernel(*node_ptr, ep->Type(), type_constraint_map, &kernel_create_info); + auto status = kernel_registry->TryFindKernel(*node_ptr, ep->Type(), type_constraint_map, + logging::LoggingManager::DefaultLogger(), // no other logger available + &kernel_create_info); ORT_RETURN_IF_ERROR(status); auto& kernel_def = kernel_create_info->kernel_def; diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc index a7f8a6424aa50..adab93908cdc4 100644 --- a/onnxruntime/test/framework/allocation_planner_test.cc +++ b/onnxruntime/test/framework/allocation_planner_test.cc @@ -252,6 +252,7 @@ class PlannerTest : public ::testing::Test { void BindKernel(onnxruntime::Node* p_node, ::onnxruntime::KernelDef& kernel_def, KernelRegistry* reg, std::unordered_map>& kernel_create_info_map) { + const auto& logger = DefaultLoggingManager().DefaultLogger(); const IExecutionProvider* ep = execution_providers_.Get(*p_node); ASSERT_NE(ep, nullptr); auto info = std::make_unique( @@ -261,7 +262,7 @@ class PlannerTest : public ::testing::Test { op_kernel_infos_.push_back(std::move(info)); const auto kernel_type_str_resolver = OpSchemaKernelTypeStrResolver{}; if (!KernelRegistry::HasImplementationOf(*reg, *p_node, onnxruntime::kCpuExecutionProvider, - kernel_type_str_resolver)) { + kernel_type_str_resolver, logger)) { ASSERT_STATUS_OK(reg->Register( KernelCreateInfo(std::make_unique(kernel_def), [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { @@ -271,7 +272,7 @@ class PlannerTest : public ::testing::Test { } const KernelCreateInfo* kci; - ASSERT_STATUS_OK(reg->TryFindKernel(*p_node, "", kernel_type_str_resolver, &kci)); + ASSERT_STATUS_OK(reg->TryFindKernel(*p_node, "", kernel_type_str_resolver, logger, &kci)); kernel_create_info_map.insert({p_node->Index(), gsl::not_null(kci)}); } @@ -283,7 +284,8 @@ class PlannerTest : public ::testing::Test { } } - void CreatePlan(const std::vector& outer_scope_node_args = {}, bool invoke_createPlan_explicityly = true) { + void CreatePlan(const std::vector& outer_scope_node_args = {}, + bool invoke_createPlan_explicityly = true) { state_.reset(new SessionState(graph_, execution_providers_, tp_.get(), nullptr, dtm_, edlm_, DefaultLoggingManager().DefaultLogger(), profiler_, *sess_options_)); EXPECT_EQ(graph_.Resolve(), Status::OK()); diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 67d60ea3a4ff6..2ff0b599beebf 100755 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -831,7 +831,8 @@ static void VerifyConstantFoldingWithDequantizeLinear(const std::unordered_mapName() == "ConstantFolding") { ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(transformer), TransformerLevel::Level1)); @@ -4704,7 +4705,8 @@ TEST_F(GraphTransformationTests, BiasGeluSwitchedInputOrder) { // Compare results double per_sample_tolerance = 1e-3; double relative_per_sample_tolerance = 0.0; - auto ret = CompareOrtValue(optimized_fetches[0], unoptimized_fetches[0], per_sample_tolerance, relative_per_sample_tolerance, false); + auto ret = CompareOrtValue(optimized_fetches[0], unoptimized_fetches[0], + per_sample_tolerance, relative_per_sample_tolerance, false); EXPECT_EQ(ret.first, COMPARE_RESULT::SUCCESS) << ret.second; } @@ -4713,7 +4715,8 @@ static void VerifyGeluApproximation(bool is_enabled, SessionOptions& session_opt std::make_unique(CPUExecutionProviderInfo()); bool has_gelu_approximation = false; - auto transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, session_options, *e.get(), {}); + auto transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, session_options, *e.get(), + DefaultLoggingManager().DefaultLogger(), {}); for (auto& transformer : transformers) { if (transformer->Name() == "GeluApproximation") { has_gelu_approximation = true; @@ -4728,7 +4731,8 @@ TEST_F(GraphTransformationTests, DoubleQDQRemover_SessionOptionConfig) { auto verify_session_config = [&](bool is_enabled, SessionOptions& session_option) { std::unique_ptr cpu_ep = std::make_unique(CPUExecutionProviderInfo()); bool has_double_qdq_remover = false; - auto transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, session_option, *cpu_ep.get(), {}); + auto transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, session_option, *cpu_ep.get(), + DefaultLoggingManager().DefaultLogger(), {}); for (auto& transformer : transformers) { if (transformer->Name() == "DoubleQDQPairsRemover") { has_double_qdq_remover = true; diff --git a/onnxruntime/test/optimizer/graph_transform_utils_test.cc b/onnxruntime/test/optimizer/graph_transform_utils_test.cc index 66b74641e41d3..caa64560426af 100644 --- a/onnxruntime/test/optimizer/graph_transform_utils_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_utils_test.cc @@ -36,9 +36,11 @@ TEST(GraphTransformerUtilsTests, TestGenerateGraphTransformers) { std::string l2_transformer = "ConvActivationFusion"; InlinedHashSet disabled = {l1_rule1, l1_transformer, l2_transformer}; CPUExecutionProvider cpu_ep(CPUExecutionProviderInfo{}); + const auto& logger = DefaultLoggingManager().DefaultLogger(); - auto all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep); - auto filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep, disabled); + auto all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep, logger); + auto filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep, logger, + disabled); // check ConstantFolding transformer was removed ASSERT_TRUE(filtered_transformers.size() == all_transformers.size() - 1); @@ -61,8 +63,9 @@ TEST(GraphTransformerUtilsTests, TestGenerateGraphTransformers) { #ifndef DISABLE_CONTRIB_OPS // check that ConvActivationFusion was removed - all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep); - filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep, disabled); + all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep, logger); + filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep, logger, + disabled); ASSERT_TRUE(filtered_transformers.size() == all_transformers.size() - 1); #endif } diff --git a/onnxruntime/test/optimizer/optimizer_test.cc b/onnxruntime/test/optimizer/optimizer_test.cc index 81c1a4ace1e33..b306f026b2dfd 100644 --- a/onnxruntime/test/optimizer/optimizer_test.cc +++ b/onnxruntime/test/optimizer/optimizer_test.cc @@ -27,6 +27,7 @@ namespace test { TEST(OptimizerTest, Basic) { Model model("OptimizerBasic", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), {{kOnnxDomain, 12}}, {}, DefaultLoggingManager().DefaultLogger()); + const logging::Logger& logger = DefaultLoggingManager().DefaultLogger(); auto& graph = model.MainGraph(); constexpr int tensor_dim = 10; @@ -66,22 +67,21 @@ TEST(OptimizerTest, Basic) { auto cpu_execution_provider = std::make_unique(CPUExecutionProviderInfo()); #if !defined(DISABLE_SPARSE_TENSORS) - OptimizerExecutionFrame::Info info(nodes, initialized_tensor_set, - graph.ModelPath(), - *cpu_execution_provider.get(), - [&graph](const std::string& name) -> bool { - return graph.IsSparseInitializer(name); - }); + OptimizerExecutionFrame::Info info( + nodes, initialized_tensor_set, graph.ModelPath(), *cpu_execution_provider.get(), + [&graph](const std::string& name) -> bool { + return graph.IsSparseInitializer(name); + }, + logger); #else - OptimizerExecutionFrame::Info info(nodes, initialized_tensor_set, - graph.ModelPath(), - *cpu_execution_provider.get(), - [](std::string const&) { return false; }); + OptimizerExecutionFrame::Info info( + nodes, initialized_tensor_set, graph.ModelPath(), *cpu_execution_provider.get(), + [](std::string const&) { return false; }, + logger); #endif //! defined(DISABLE_SPARSE_TENSORS) std::vector fetch_mlvalue_idxs{info.GetMLValueIndex("out")}; OptimizerExecutionFrame frame(info, fetch_mlvalue_idxs); - const logging::Logger& logger = DefaultLoggingManager().DefaultLogger(); const ConfigOptions empty_config_options; diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index cfee4a83a4292..043b92d7ef121 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -3928,6 +3928,7 @@ TEST(QDQTransformerTests, QDQPropagation_DQForward_SliceMultipleConsumers) { TEST(QDQTransformerTests, QDQ_Selector_Test) { const ORTCHAR_T* model_file_name = ORT_TSTR("testdata/transform/qdq_conv.onnx"); + const auto& logger = DefaultLoggingManager().DefaultLogger(); SessionOptions so; // We want to keep the graph un-optimized to prevent QDQ transformer to kick in @@ -3962,7 +3963,7 @@ TEST(QDQTransformerTests, QDQ_Selector_Test) { // Check if SelectorManager get a conv qdq group selection as expected { - const auto result = selector_mgr.GetQDQSelections(whole_graph_viewer); + const auto result = selector_mgr.GetQDQSelections(whole_graph_viewer, logger); ASSERT_FALSE(result.empty()); const auto& qdq_group = result.at(0); ASSERT_EQ(std::vector({0, 1, 2}), qdq_group.dq_nodes); @@ -3977,7 +3978,7 @@ TEST(QDQTransformerTests, QDQ_Selector_Test) { std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(whole_graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(whole_graph_viewer, logger); // We should get a single QDQ Node unit in the result ASSERT_EQ(1, node_unit_holder.size()); @@ -4045,7 +4046,7 @@ TEST(QDQTransformerTests, QDQ_Selector_Test) { // Check SelectorManager will get empty result { - const auto result = selector_mgr.GetQDQSelections(partial_graph_viewer); + const auto result = selector_mgr.GetQDQSelections(partial_graph_viewer, logger); ASSERT_TRUE(result.empty()); } } diff --git a/onnxruntime/test/providers/base_tester.cc b/onnxruntime/test/providers/base_tester.cc index 9d83c789c5124..b0958e05dc373 100644 --- a/onnxruntime/test/providers/base_tester.cc +++ b/onnxruntime/test/providers/base_tester.cc @@ -420,6 +420,7 @@ bool SetEpsForAllNodes(Graph& graph, continue; bool found = false; + const auto& logger = DefaultLoggingManager().DefaultLogger(); for (const auto& ep : execution_providers) { auto provider_type = ep->Type(); @@ -438,7 +439,8 @@ bool SetEpsForAllNodes(Graph& graph, } // Check the EP has an impl for the node from builtin registry. - if (KernelRegistry::HasImplementationOf(*ep->GetKernelRegistry(), node, ep->Type(), kernel_type_str_resolver)) { + if (KernelRegistry::HasImplementationOf(*ep->GetKernelRegistry(), node, ep->Type(), kernel_type_str_resolver, + logger)) { found = true; break; } @@ -451,6 +453,7 @@ bool SetEpsForAllNodes(Graph& graph, std::string_view(kMSInternalNHWCDomain), node.SinceVersion(), type_constraint_map, + logger, &kci); if (status.IsOK() && kci != nullptr) { found = true; @@ -463,7 +466,7 @@ bool SetEpsForAllNodes(Graph& graph, std::any_of(custom_registries->cbegin(), custom_registries->cend(), [&](auto reg) { return KernelRegistry::HasImplementationOf(*reg->GetKernelRegistry(), node, ep->Type(), - kernel_type_str_resolver); + kernel_type_str_resolver, logger); })) { found = true; break; diff --git a/onnxruntime/test/providers/kernel_compute_test_utils.cc b/onnxruntime/test/providers/kernel_compute_test_utils.cc index 23ec48fa649dd..93e688570631e 100644 --- a/onnxruntime/test/providers/kernel_compute_test_utils.cc +++ b/onnxruntime/test/providers/kernel_compute_test_utils.cc @@ -42,8 +42,9 @@ void KernelComputeTester::Run(std::unordered_set strided_outputs) { } #endif + const auto& logger = DefaultLoggingManager().DefaultLogger(); Model model("test", false, ModelMetaData(), ORT_TSTR(""), IOnnxRuntimeOpSchemaRegistryList(), - {{domain_, opset_version_}}, {}, DefaultLoggingManager().DefaultLogger()); + {{domain_, opset_version_}}, {}, logger); std::vector input_args; std::unordered_map initializer_map; @@ -89,8 +90,7 @@ void KernelComputeTester::Run(std::unordered_set strided_outputs) { ASSERT_STATUS_OK(graph.Resolve()); node.SetExecutionProviderType(ep_type); - OptimizerExecutionFrame::Info info({&node}, initializer_map, graph.ModelPath(), *execution_providers.Get(ep_type), - [](std::string const&) { return false; }); + OptimizerExecutionFrame::Info info({&node}, initializer_map, graph.ModelPath(), *execution_providers.Get(ep_type), [](std::string const&) { return false; }, logger); const KernelCreateInfo* kernel_create_info = nullptr; ASSERT_STATUS_OK(info.TryFindKernel(&node, &kernel_create_info)); ASSERT_TRUE(kernel_create_info); @@ -139,7 +139,7 @@ void KernelComputeTester::Run(std::unordered_set strided_outputs) { #pragma warning(disable : 6387) #endif OptimizerExecutionFrame frame(info, fetch_mlvalue_idxs, outputs); - OpKernelContext op_kernel_context(&frame, kernel.get(), nullptr, nullptr, DefaultLoggingManager().DefaultLogger()); + OpKernelContext op_kernel_context(&frame, kernel.get(), nullptr, nullptr, logger); #ifdef _WIN32 #pragma warning(pop) #endif diff --git a/onnxruntime/test/providers/partitioning_utils_test.cc b/onnxruntime/test/providers/partitioning_utils_test.cc index 5db69489afaef..f1fbb1cea7ea2 100644 --- a/onnxruntime/test/providers/partitioning_utils_test.cc +++ b/onnxruntime/test/providers/partitioning_utils_test.cc @@ -51,7 +51,7 @@ TEST(PartitioningUtilsTest, TestQDQHandling) { std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); auto result = utils::CreateSupportedPartitions(graph_viewer, is_node_supported, on_group_closed, gen_metadef_name, "TEST", kCpuExecutionProvider, &node_unit_map, @@ -82,7 +82,7 @@ static void CheckAllNodesProcessed(const std::function& std::vector> node_unit_holder; std::unordered_map node_unit_map; - std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer); + std::tie(node_unit_holder, node_unit_map) = QDQ::GetAllNodeUnits(graph_viewer, logger); const auto is_node_supported = [&](const Node& /*node*/) -> bool { return true; diff --git a/orttraining/orttraining/core/session/training_session.cc b/orttraining/orttraining/core/session/training_session.cc index 87a7cbc0375a4..f1545e96481fa 100644 --- a/orttraining/orttraining/core/session/training_session.cc +++ b/orttraining/orttraining/core/session/training_session.cc @@ -758,7 +758,8 @@ Status TrainingSession::AddPredefinedTransformers( GraphTransformerManager& transformer_manager, TransformerLevel graph_optimization_level, MinimalBuildOptimizationHandling minimal_build_optimization_handling, - RecordRuntimeOptimizationProducedNodeOpSchemaFn /*record_runtime_optimization_produced_op_schema_fn*/) const { + RecordRuntimeOptimizationProducedNodeOpSchemaFn /*record_runtime_optimization_produced_op_schema_fn*/, + const logging::Logger& /*logger*/) const { ORT_RETURN_IF_NOT( minimal_build_optimization_handling == MinimalBuildOptimizationHandling::ApplyFullBuildOptimizations, "Only applying full build optimizations is supported by TrainingSession."); diff --git a/orttraining/orttraining/core/session/training_session.h b/orttraining/orttraining/core/session/training_session.h index 765f88e1c992e..58492dc62400f 100644 --- a/orttraining/orttraining/core/session/training_session.h +++ b/orttraining/orttraining/core/session/training_session.h @@ -489,7 +489,8 @@ class TrainingSession : public InferenceSession { GraphTransformerManager& transformer_manager, TransformerLevel graph_optimization_level, MinimalBuildOptimizationHandling minimal_build_optimization_handling, - RecordRuntimeOptimizationProducedNodeOpSchemaFn record_runtime_optimization_produced_op_schema_fn) const override; + RecordRuntimeOptimizationProducedNodeOpSchemaFn record_runtime_optimization_produced_op_schema_fn, + const logging::Logger& logger) const override; /** Perform auto-diff to add backward graph into the model. @param weights_to_train a set of weights to be training. diff --git a/orttraining/orttraining/test/gradient/gradient_op_test_utils.cc b/orttraining/orttraining/test/gradient/gradient_op_test_utils.cc index 0944e46ff8eaf..58c173ed90277 100644 --- a/orttraining/orttraining/test/gradient/gradient_op_test_utils.cc +++ b/orttraining/orttraining/test/gradient/gradient_op_test_utils.cc @@ -139,7 +139,8 @@ void GradientOpTester::Run(int output_index_to_use_as_loss, auto reg = execution_provider->GetKernelRegistry(); const KernelCreateInfo* kci; - auto st = reg->TryFindKernel(node, execution_provider->Type(), kernel_type_str_resolver, &kci); + auto st = reg->TryFindKernel(node, execution_provider->Type(), kernel_type_str_resolver, + DefaultLoggingManager().DefaultLogger(), &kci); if (!st.IsOK()) { // The goal here is unclear. It seems best to leave it to the Session // creation to figure out whether the model can be executed using some diff --git a/orttraining/orttraining/test/optimizer/graph_transformer_utils_test.cc b/orttraining/orttraining/test/optimizer/graph_transformer_utils_test.cc index 548f39bb0150c..1b8699d1de497 100644 --- a/orttraining/orttraining/test/optimizer/graph_transformer_utils_test.cc +++ b/orttraining/orttraining/test/optimizer/graph_transformer_utils_test.cc @@ -23,8 +23,10 @@ TEST(GraphTransformerUtilsTestsForTraining, TestGenerateGraphTransformers) { InlinedHashSet disabled = {l1_rule1, l1_transformer, l2_transformer}; CPUExecutionProvider cpu_ep(CPUExecutionProviderInfo{}); - auto all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep); - auto filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep, disabled); + const auto& logger = DefaultLoggingManager().DefaultLogger(); + auto all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep, logger); + auto filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level1, {}, cpu_ep, logger, + disabled); // check ConstantFolding transformer was removed ASSERT_TRUE(filtered_transformers.size() == all_transformers.size() - 1); @@ -47,8 +49,8 @@ TEST(GraphTransformerUtilsTestsForTraining, TestGenerateGraphTransformers) { #ifndef DISABLE_CONTRIB_OPS // check that ConvActivationFusion was removed - all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep); - filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep, disabled); + all_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep, logger); + filtered_transformers = optimizer_utils::GenerateTransformers(TransformerLevel::Level2, {}, cpu_ep, logger, disabled); ASSERT_TRUE(filtered_transformers.size() == all_transformers.size() - 1); #endif } From b14b4ec70382a57faf2d663afc3ca1936bfd20de Mon Sep 17 00:00:00 2001 From: A-Satti <104173224+A-Satti@users.noreply.github.com> Date: Mon, 9 Dec 2024 21:52:21 -0800 Subject: [PATCH 06/13] Restore Qspectre flag (#23060) Restore a removed Qspectre flag and update comment ### Motivation and Context Adjustment for PR https://github.com/microsoft/onnxruntime/commit/f5293d253c2ccc7a96c9ec791b91e0035359f45f --- tools/ci_build/build.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index fdc85e2cafa31..6ee37b8b0519e 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1559,6 +1559,9 @@ def generate_build_tree( # The "/profile" flag implies "/DEBUG:FULL /DEBUGTYPE:cv,fixup /OPT:REF /OPT:NOICF /INCREMENTAL:NO /FIXED:NO". We set it for satisfying a Microsoft internal compliance requirement. External users # do not need to have it. ldflags = ["/profile", "/DYNAMICBASE"] + # Address Sanitizer libs do not have a Qspectre version. So they two cannot be both enabled. + if not args.enable_address_sanitizer: + cflags += ["/Qspectre"] if config == "Release": cflags += ["/O2", "/Ob2", "/DNDEBUG"] elif config == "RelWithDebInfo": From 5f7b9d024525ef0afb2c931755644933c8f75e5f Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 10 Dec 2024 13:49:03 -0500 Subject: [PATCH 07/13] Upgrade gradle to 8.7 (#23016) ### Description This PR only upgrade the gradle version and `com.android.tools.build:gradle` version from build.gradle. This only update the react-native library gradle version, not the e2e test. ### Motivation and Context --- .../android/gradle/wrapper/gradle-wrapper.jar | Bin 60756 -> 43453 bytes .../gradle/wrapper/gradle-wrapper.properties | 6 ++- js/react_native/android/gradlew | 35 +++++++++++------- js/react_native/android/gradlew.bat | 21 ++++++----- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/js/react_native/android/gradle/wrapper/gradle-wrapper.jar b/js/react_native/android/gradle/wrapper/gradle-wrapper.jar index 249e5832f090a2944b7473328c07c9755baa3196..e6441136f3d4ba8a0da8d277868979cfbc8ad796 100644 GIT binary patch literal 43453 zcma&N1CXTcmMvW9vTb(Rwr$&4wr$(C?dmSu>@vG-+vuvg^_??!{yS%8zW-#zn-LkA z5&1^$^{lnmUON?}LBF8_K|(?T0Ra(xUH{($5eN!MR#ZihR#HxkUPe+_R8Cn`RRs(P z_^*#_XlXmGv7!4;*Y%p4nw?{bNp@UZHv1?Um8r6)Fei3p@ClJn0ECfg1hkeuUU@Or zDaPa;U3fE=3L}DooL;8f;P0ipPt0Z~9P0)lbStMS)ag54=uL9ia-Lm3nh|@(Y?B`; zx_#arJIpXH!U{fbCbI^17}6Ri*H<>OLR%c|^mh8+)*h~K8Z!9)DPf zR2h?lbDZQ`p9P;&DQ4F0sur@TMa!Y}S8irn(%d-gi0*WxxCSk*A?3lGh=gcYN?FGl z7D=Js!i~0=u3rox^eO3i@$0=n{K1lPNU zwmfjRVmLOCRfe=seV&P*1Iq=^i`502keY8Uy-WNPwVNNtJFx?IwAyRPZo2Wo1+S(xF37LJZ~%i)kpFQ3Fw=mXfd@>%+)RpYQLnr}B~~zoof(JVm^^&f zxKV^+3D3$A1G;qh4gPVjhrC8e(VYUHv#dy^)(RoUFM?o%W-EHxufuWf(l*@-l+7vt z=l`qmR56K~F|v<^Pd*p~1_y^P0P^aPC##d8+HqX4IR1gu+7w#~TBFphJxF)T$2WEa zxa?H&6=Qe7d(#tha?_1uQys2KtHQ{)Qco)qwGjrdNL7thd^G5i8Os)CHqc>iOidS} z%nFEDdm=GXBw=yXe1W-ShHHFb?Cc70+$W~z_+}nAoHFYI1MV1wZegw*0y^tC*s%3h zhD3tN8b=Gv&rj}!SUM6|ajSPp*58KR7MPpI{oAJCtY~JECm)*m_x>AZEu>DFgUcby z1Qaw8lU4jZpQ_$;*7RME+gq1KySGG#Wql>aL~k9tLrSO()LWn*q&YxHEuzmwd1?aAtI zBJ>P=&$=l1efe1CDU;`Fd+_;&wI07?V0aAIgc(!{a z0Jg6Y=inXc3^n!U0Atk`iCFIQooHqcWhO(qrieUOW8X(x?(RD}iYDLMjSwffH2~tB z)oDgNBLB^AJBM1M^c5HdRx6fBfka`(LD-qrlh5jqH~);#nw|iyp)()xVYak3;Ybik z0j`(+69aK*B>)e_p%=wu8XC&9e{AO4c~O1U`5X9}?0mrd*m$_EUek{R?DNSh(=br# z#Q61gBzEpmy`$pA*6!87 zSDD+=@fTY7<4A?GLqpA?Pb2z$pbCc4B4zL{BeZ?F-8`s$?>*lXXtn*NC61>|*w7J* z$?!iB{6R-0=KFmyp1nnEmLsA-H0a6l+1uaH^g%c(p{iT&YFrbQ$&PRb8Up#X3@Zsk zD^^&LK~111%cqlP%!_gFNa^dTYT?rhkGl}5=fL{a`UViaXWI$k-UcHJwmaH1s=S$4 z%4)PdWJX;hh5UoK?6aWoyLxX&NhNRqKam7tcOkLh{%j3K^4Mgx1@i|Pi&}<^5>hs5 zm8?uOS>%)NzT(%PjVPGa?X%`N2TQCKbeH2l;cTnHiHppPSJ<7y-yEIiC!P*ikl&!B z%+?>VttCOQM@ShFguHVjxX^?mHX^hSaO_;pnyh^v9EumqSZTi+#f&_Vaija0Q-e*| z7ulQj6Fs*bbmsWp{`auM04gGwsYYdNNZcg|ph0OgD>7O}Asn7^Z=eI>`$2*v78;sj-}oMoEj&@)9+ycEOo92xSyY344^ z11Hb8^kdOvbf^GNAK++bYioknrpdN>+u8R?JxG=!2Kd9r=YWCOJYXYuM0cOq^FhEd zBg2puKy__7VT3-r*dG4c62Wgxi52EMCQ`bKgf*#*ou(D4-ZN$+mg&7$u!! z-^+Z%;-3IDwqZ|K=ah85OLwkO zKxNBh+4QHh)u9D?MFtpbl)us}9+V!D%w9jfAMYEb>%$A;u)rrI zuBudh;5PN}_6J_}l55P3l_)&RMlH{m!)ai-i$g)&*M`eN$XQMw{v^r@-125^RRCF0 z^2>|DxhQw(mtNEI2Kj(;KblC7x=JlK$@78`O~>V!`|1Lm-^JR$-5pUANAnb(5}B}JGjBsliK4& zk6y(;$e&h)lh2)L=bvZKbvh@>vLlreBdH8No2>$#%_Wp1U0N7Ank!6$dFSi#xzh|( zRi{Uw%-4W!{IXZ)fWx@XX6;&(m_F%c6~X8hx=BN1&q}*( zoaNjWabE{oUPb!Bt$eyd#$5j9rItB-h*5JiNi(v^e|XKAj*8(k<5-2$&ZBR5fF|JA z9&m4fbzNQnAU}r8ab>fFV%J0z5awe#UZ|bz?Ur)U9bCIKWEzi2%A+5CLqh?}K4JHi z4vtM;+uPsVz{Lfr;78W78gC;z*yTch~4YkLr&m-7%-xc ztw6Mh2d>_iO*$Rd8(-Cr1_V8EO1f*^@wRoSozS) zy1UoC@pruAaC8Z_7~_w4Q6n*&B0AjOmMWa;sIav&gu z|J5&|{=a@vR!~k-OjKEgPFCzcJ>#A1uL&7xTDn;{XBdeM}V=l3B8fE1--DHjSaxoSjNKEM9|U9#m2<3>n{Iuo`r3UZp;>GkT2YBNAh|b z^jTq-hJp(ebZh#Lk8hVBP%qXwv-@vbvoREX$TqRGTgEi$%_F9tZES@z8Bx}$#5eeG zk^UsLBH{bc2VBW)*EdS({yw=?qmevwi?BL6*=12k9zM5gJv1>y#ML4!)iiPzVaH9% zgSImetD@dam~e>{LvVh!phhzpW+iFvWpGT#CVE5TQ40n%F|p(sP5mXxna+Ev7PDwA zamaV4m*^~*xV+&p;W749xhb_X=$|LD;FHuB&JL5?*Y2-oIT(wYY2;73<^#46S~Gx| z^cez%V7x$81}UWqS13Gz80379Rj;6~WdiXWOSsdmzY39L;Hg3MH43o*y8ibNBBH`(av4|u;YPq%{R;IuYow<+GEsf@R?=@tT@!}?#>zIIn0CoyV!hq3mw zHj>OOjfJM3F{RG#6ujzo?y32m^tgSXf@v=J$ELdJ+=5j|=F-~hP$G&}tDZsZE?5rX ztGj`!S>)CFmdkccxM9eGIcGnS2AfK#gXwj%esuIBNJQP1WV~b~+D7PJTmWGTSDrR` zEAu4B8l>NPuhsk5a`rReSya2nfV1EK01+G!x8aBdTs3Io$u5!6n6KX%uv@DxAp3F@{4UYg4SWJtQ-W~0MDb|j-$lwVn znAm*Pl!?Ps&3wO=R115RWKb*JKoexo*)uhhHBncEDMSVa_PyA>k{Zm2(wMQ(5NM3# z)jkza|GoWEQo4^s*wE(gHz?Xsg4`}HUAcs42cM1-qq_=+=!Gk^y710j=66(cSWqUe zklbm8+zB_syQv5A2rj!Vbw8;|$@C!vfNmNV!yJIWDQ>{+2x zKjuFX`~~HKG~^6h5FntRpnnHt=D&rq0>IJ9#F0eM)Y-)GpRjiN7gkA8wvnG#K=q{q z9dBn8_~wm4J<3J_vl|9H{7q6u2A!cW{bp#r*-f{gOV^e=8S{nc1DxMHFwuM$;aVI^ zz6A*}m8N-&x8;aunp1w7_vtB*pa+OYBw=TMc6QK=mbA-|Cf* zvyh8D4LRJImooUaSb7t*fVfih<97Gf@VE0|z>NcBwBQze);Rh!k3K_sfunToZY;f2 z^HmC4KjHRVg+eKYj;PRN^|E0>Gj_zagfRbrki68I^#~6-HaHg3BUW%+clM1xQEdPYt_g<2K+z!$>*$9nQ>; zf9Bei{?zY^-e{q_*|W#2rJG`2fy@{%6u0i_VEWTq$*(ZN37|8lFFFt)nCG({r!q#9 z5VK_kkSJ3?zOH)OezMT{!YkCuSSn!K#-Rhl$uUM(bq*jY? zi1xbMVthJ`E>d>(f3)~fozjg^@eheMF6<)I`oeJYx4*+M&%c9VArn(OM-wp%M<-`x z7sLP1&3^%Nld9Dhm@$3f2}87!quhI@nwd@3~fZl_3LYW-B?Ia>ui`ELg z&Qfe!7m6ze=mZ`Ia9$z|ARSw|IdMpooY4YiPN8K z4B(ts3p%2i(Td=tgEHX z0UQ_>URBtG+-?0E;E7Ld^dyZ;jjw0}XZ(}-QzC6+NN=40oDb2^v!L1g9xRvE#@IBR zO!b-2N7wVfLV;mhEaXQ9XAU+>=XVA6f&T4Z-@AX!leJ8obP^P^wP0aICND?~w&NykJ#54x3_@r7IDMdRNy4Hh;h*!u(Ol(#0bJdwEo$5437-UBjQ+j=Ic>Q2z` zJNDf0yO6@mr6y1#n3)s(W|$iE_i8r@Gd@!DWDqZ7J&~gAm1#~maIGJ1sls^gxL9LLG_NhU!pTGty!TbhzQnu)I*S^54U6Yu%ZeCg`R>Q zhBv$n5j0v%O_j{QYWG!R9W?5_b&67KB$t}&e2LdMvd(PxN6Ir!H4>PNlerpBL>Zvyy!yw z-SOo8caEpDt(}|gKPBd$qND5#a5nju^O>V&;f890?yEOfkSG^HQVmEbM3Ugzu+UtH zC(INPDdraBN?P%kE;*Ae%Wto&sgw(crfZ#Qy(<4nk;S|hD3j{IQRI6Yq|f^basLY; z-HB&Je%Gg}Jt@={_C{L$!RM;$$|iD6vu#3w?v?*;&()uB|I-XqEKqZPS!reW9JkLewLb!70T7n`i!gNtb1%vN- zySZj{8-1>6E%H&=V}LM#xmt`J3XQoaD|@XygXjdZ1+P77-=;=eYpoEQ01B@L*a(uW zrZeZz?HJsw_4g0vhUgkg@VF8<-X$B8pOqCuWAl28uB|@r`19DTUQQsb^pfqB6QtiT z*`_UZ`fT}vtUY#%sq2{rchyfu*pCg;uec2$-$N_xgjZcoumE5vSI{+s@iLWoz^Mf; zuI8kDP{!XY6OP~q5}%1&L}CtfH^N<3o4L@J@zg1-mt{9L`s^z$Vgb|mr{@WiwAqKg zp#t-lhrU>F8o0s1q_9y`gQNf~Vb!F%70f}$>i7o4ho$`uciNf=xgJ>&!gSt0g;M>*x4-`U)ysFW&Vs^Vk6m%?iuWU+o&m(2Jm26Y(3%TL; zA7T)BP{WS!&xmxNw%J=$MPfn(9*^*TV;$JwRy8Zl*yUZi8jWYF>==j~&S|Xinsb%c z2?B+kpet*muEW7@AzjBA^wAJBY8i|#C{WtO_or&Nj2{=6JTTX05}|H>N2B|Wf!*3_ z7hW*j6p3TvpghEc6-wufFiY!%-GvOx*bZrhZu+7?iSrZL5q9}igiF^*R3%DE4aCHZ zqu>xS8LkW+Auv%z-<1Xs92u23R$nk@Pk}MU5!gT|c7vGlEA%G^2th&Q*zfg%-D^=f z&J_}jskj|Q;73NP4<4k*Y%pXPU2Thoqr+5uH1yEYM|VtBPW6lXaetokD0u z9qVek6Q&wk)tFbQ8(^HGf3Wp16gKmr>G;#G(HRBx?F`9AIRboK+;OfHaLJ(P>IP0w zyTbTkx_THEOs%Q&aPrxbZrJlio+hCC_HK<4%f3ZoSAyG7Dn`=X=&h@m*|UYO-4Hq0 z-Bq&+Ie!S##4A6OGoC~>ZW`Y5J)*ouaFl_e9GA*VSL!O_@xGiBw!AF}1{tB)z(w%c zS1Hmrb9OC8>0a_$BzeiN?rkPLc9%&;1CZW*4}CDDNr2gcl_3z+WC15&H1Zc2{o~i) z)LLW=WQ{?ricmC`G1GfJ0Yp4Dy~Ba;j6ZV4r{8xRs`13{dD!xXmr^Aga|C=iSmor% z8hi|pTXH)5Yf&v~exp3o+sY4B^^b*eYkkCYl*T{*=-0HniSA_1F53eCb{x~1k3*`W zr~};p1A`k{1DV9=UPnLDgz{aJH=-LQo<5%+Em!DNN252xwIf*wF_zS^!(XSm(9eoj z=*dXG&n0>)_)N5oc6v!>-bd(2ragD8O=M|wGW z!xJQS<)u70m&6OmrF0WSsr@I%T*c#Qo#Ha4d3COcX+9}hM5!7JIGF>7<~C(Ear^Sn zm^ZFkV6~Ula6+8S?oOROOA6$C&q&dp`>oR-2Ym3(HT@O7Sd5c~+kjrmM)YmgPH*tL zX+znN>`tv;5eOfX?h{AuX^LK~V#gPCu=)Tigtq9&?7Xh$qN|%A$?V*v=&-2F$zTUv z`C#WyIrChS5|Kgm_GeudCFf;)!WH7FI60j^0o#65o6`w*S7R@)88n$1nrgU(oU0M9 zx+EuMkC>(4j1;m6NoGqEkpJYJ?vc|B zOlwT3t&UgL!pX_P*6g36`ZXQ; z9~Cv}ANFnJGp(;ZhS(@FT;3e)0)Kp;h^x;$*xZn*k0U6-&FwI=uOGaODdrsp-!K$Ac32^c{+FhI-HkYd5v=`PGsg%6I`4d9Jy)uW0y%) zm&j^9WBAp*P8#kGJUhB!L?a%h$hJgQrx!6KCB_TRo%9{t0J7KW8!o1B!NC)VGLM5! zpZy5Jc{`r{1e(jd%jsG7k%I+m#CGS*BPA65ZVW~fLYw0dA-H_}O zrkGFL&P1PG9p2(%QiEWm6x;U-U&I#;Em$nx-_I^wtgw3xUPVVu zqSuKnx&dIT-XT+T10p;yjo1Y)z(x1fb8Dzfn8e yu?e%!_ptzGB|8GrCfu%p?(_ zQccdaaVK$5bz;*rnyK{_SQYM>;aES6Qs^lj9lEs6_J+%nIiuQC*fN;z8md>r_~Mfl zU%p5Dt_YT>gQqfr@`cR!$NWr~+`CZb%dn;WtzrAOI>P_JtsB76PYe*<%H(y>qx-`Kq!X_; z<{RpAqYhE=L1r*M)gNF3B8r(<%8mo*SR2hu zccLRZwGARt)Hlo1euqTyM>^!HK*!Q2P;4UYrysje@;(<|$&%vQekbn|0Ruu_Io(w4#%p6ld2Yp7tlA`Y$cciThP zKzNGIMPXX%&Ud0uQh!uQZz|FB`4KGD?3!ND?wQt6!n*f4EmCoJUh&b?;B{|lxs#F- z31~HQ`SF4x$&v00@(P+j1pAaj5!s`)b2RDBp*PB=2IB>oBF!*6vwr7Dp%zpAx*dPr zb@Zjq^XjN?O4QcZ*O+8>)|HlrR>oD*?WQl5ri3R#2?*W6iJ>>kH%KnnME&TT@ZzrHS$Q%LC?n|e>V+D+8D zYc4)QddFz7I8#}y#Wj6>4P%34dZH~OUDb?uP%-E zwjXM(?Sg~1!|wI(RVuxbu)-rH+O=igSho_pDCw(c6b=P zKk4ATlB?bj9+HHlh<_!&z0rx13K3ZrAR8W)!@Y}o`?a*JJsD+twZIv`W)@Y?Amu_u zz``@-e2X}27$i(2=9rvIu5uTUOVhzwu%mNazS|lZb&PT;XE2|B&W1>=B58#*!~D&) zfVmJGg8UdP*fx(>Cj^?yS^zH#o-$Q-*$SnK(ZVFkw+er=>N^7!)FtP3y~Xxnu^nzY zikgB>Nj0%;WOltWIob|}%lo?_C7<``a5hEkx&1ku$|)i>Rh6@3h*`slY=9U}(Ql_< zaNG*J8vb&@zpdhAvv`?{=zDedJ23TD&Zg__snRAH4eh~^oawdYi6A3w8<Ozh@Kw)#bdktM^GVb zrG08?0bG?|NG+w^&JvD*7LAbjED{_Zkc`3H!My>0u5Q}m!+6VokMLXxl`Mkd=g&Xx z-a>m*#G3SLlhbKB!)tnzfWOBV;u;ftU}S!NdD5+YtOjLg?X}dl>7m^gOpihrf1;PY zvll&>dIuUGs{Qnd- zwIR3oIrct8Va^Tm0t#(bJD7c$Z7DO9*7NnRZorrSm`b`cxz>OIC;jSE3DO8`hX955ui`s%||YQtt2 z5DNA&pG-V+4oI2s*x^>-$6J?p=I>C|9wZF8z;VjR??Icg?1w2v5Me+FgAeGGa8(3S z4vg*$>zC-WIVZtJ7}o9{D-7d>zCe|z#<9>CFve-OPAYsneTb^JH!Enaza#j}^mXy1 z+ULn^10+rWLF6j2>Ya@@Kq?26>AqK{A_| zQKb*~F1>sE*=d?A?W7N2j?L09_7n+HGi{VY;MoTGr_)G9)ot$p!-UY5zZ2Xtbm=t z@dpPSGwgH=QtIcEulQNI>S-#ifbnO5EWkI;$A|pxJd885oM+ zGZ0_0gDvG8q2xebj+fbCHYfAXuZStH2j~|d^sBAzo46(K8n59+T6rzBwK)^rfPT+B zyIFw)9YC-V^rhtK`!3jrhmW-sTmM+tPH+;nwjL#-SjQPUZ53L@A>y*rt(#M(qsiB2 zx6B)dI}6Wlsw%bJ8h|(lhkJVogQZA&n{?Vgs6gNSXzuZpEyu*xySy8ro07QZ7Vk1!3tJphN_5V7qOiyK8p z#@jcDD8nmtYi1^l8ml;AF<#IPK?!pqf9D4moYk>d99Im}Jtwj6c#+A;f)CQ*f-hZ< z=p_T86jog%!p)D&5g9taSwYi&eP z#JuEK%+NULWus;0w32-SYFku#i}d~+{Pkho&^{;RxzP&0!RCm3-9K6`>KZpnzS6?L z^H^V*s!8<>x8bomvD%rh>Zp3>Db%kyin;qtl+jAv8Oo~1g~mqGAC&Qi_wy|xEt2iz zWAJEfTV%cl2Cs<1L&DLRVVH05EDq`pH7Oh7sR`NNkL%wi}8n>IXcO40hp+J+sC!W?!krJf!GJNE8uj zg-y~Ns-<~D?yqbzVRB}G>0A^f0!^N7l=$m0OdZuqAOQqLc zX?AEGr1Ht+inZ-Qiwnl@Z0qukd__a!C*CKuGdy5#nD7VUBM^6OCpxCa2A(X;e0&V4 zM&WR8+wErQ7UIc6LY~Q9x%Sn*Tn>>P`^t&idaOEnOd(Ufw#>NoR^1QdhJ8s`h^|R_ zXX`c5*O~Xdvh%q;7L!_!ohf$NfEBmCde|#uVZvEo>OfEq%+Ns7&_f$OR9xsihRpBb z+cjk8LyDm@U{YN>+r46?nn{7Gh(;WhFw6GAxtcKD+YWV?uge>;+q#Xx4!GpRkVZYu zzsF}1)7$?%s9g9CH=Zs+B%M_)+~*j3L0&Q9u7!|+T`^O{xE6qvAP?XWv9_MrZKdo& z%IyU)$Q95AB4!#hT!_dA>4e@zjOBD*Y=XjtMm)V|+IXzjuM;(l+8aA5#Kaz_$rR6! zj>#&^DidYD$nUY(D$mH`9eb|dtV0b{S>H6FBfq>t5`;OxA4Nn{J(+XihF(stSche7$es&~N$epi&PDM_N`As;*9D^L==2Q7Z2zD+CiU(|+-kL*VG+&9!Yb3LgPy?A zm7Z&^qRG_JIxK7-FBzZI3Q<;{`DIxtc48k> zc|0dmX;Z=W$+)qE)~`yn6MdoJ4co;%!`ddy+FV538Y)j(vg}5*k(WK)KWZ3WaOG!8 z!syGn=s{H$odtpqFrT#JGM*utN7B((abXnpDM6w56nhw}OY}0TiTG1#f*VFZr+^-g zbP10`$LPq_;PvrA1XXlyx2uM^mrjTzX}w{yuLo-cOClE8MMk47T25G8M!9Z5ypOSV zAJUBGEg5L2fY)ZGJb^E34R2zJ?}Vf>{~gB!8=5Z) z9y$>5c)=;o0HeHHSuE4U)#vG&KF|I%-cF6f$~pdYJWk_dD}iOA>iA$O$+4%@>JU08 zS`ep)$XLPJ+n0_i@PkF#ri6T8?ZeAot$6JIYHm&P6EB=BiaNY|aA$W0I+nz*zkz_z zkEru!tj!QUffq%)8y0y`T&`fuus-1p>=^hnBiBqD^hXrPs`PY9tU3m0np~rISY09> z`P3s=-kt_cYcxWd{de@}TwSqg*xVhp;E9zCsnXo6z z?f&Sv^U7n4`xr=mXle94HzOdN!2kB~4=%)u&N!+2;z6UYKUDqi-s6AZ!haB;@&B`? z_TRX0%@suz^TRdCb?!vNJYPY8L_}&07uySH9%W^Tc&1pia6y1q#?*Drf}GjGbPjBS zbOPcUY#*$3sL2x4v_i*Y=N7E$mR}J%|GUI(>WEr+28+V z%v5{#e!UF*6~G&%;l*q*$V?&r$Pp^sE^i-0$+RH3ERUUdQ0>rAq2(2QAbG}$y{de( z>{qD~GGuOk559Y@%$?N^1ApVL_a704>8OD%8Y%8B;FCt%AoPu8*D1 zLB5X>b}Syz81pn;xnB}%0FnwazlWfUV)Z-~rZg6~b z6!9J$EcE&sEbzcy?CI~=boWA&eeIa%z(7SE^qgVLz??1Vbc1*aRvc%Mri)AJaAG!p z$X!_9Ds;Zz)f+;%s&dRcJt2==P{^j3bf0M=nJd&xwUGlUFn?H=2W(*2I2Gdu zv!gYCwM10aeus)`RIZSrCK=&oKaO_Ry~D1B5!y0R=%!i2*KfXGYX&gNv_u+n9wiR5 z*e$Zjju&ODRW3phN925%S(jL+bCHv6rZtc?!*`1TyYXT6%Ju=|X;6D@lq$8T zW{Y|e39ioPez(pBH%k)HzFITXHvnD6hw^lIoUMA;qAJ^CU?top1fo@s7xT13Fvn1H z6JWa-6+FJF#x>~+A;D~;VDs26>^oH0EI`IYT2iagy23?nyJ==i{g4%HrAf1-*v zK1)~@&(KkwR7TL}L(A@C_S0G;-GMDy=MJn2$FP5s<%wC)4jC5PXoxrQBFZ_k0P{{s@sz+gX`-!=T8rcB(=7vW}^K6oLWMmp(rwDh}b zwaGGd>yEy6fHv%jM$yJXo5oMAQ>c9j`**}F?MCry;T@47@r?&sKHgVe$MCqk#Z_3S z1GZI~nOEN*P~+UaFGnj{{Jo@16`(qVNtbU>O0Hf57-P>x8Jikp=`s8xWs^dAJ9lCQ z)GFm+=OV%AMVqVATtN@|vp61VVAHRn87}%PC^RAzJ%JngmZTasWBAWsoAqBU+8L8u z4A&Pe?fmTm0?mK-BL9t+{y7o(7jm+RpOhL9KnY#E&qu^}B6=K_dB}*VlSEiC9fn)+V=J;OnN)Ta5v66ic1rG+dGAJ1 z1%Zb_+!$=tQ~lxQrzv3x#CPb?CekEkA}0MYSgx$Jdd}q8+R=ma$|&1a#)TQ=l$1tQ z=tL9&_^vJ)Pk}EDO-va`UCT1m#Uty1{v^A3P~83_#v^ozH}6*9mIjIr;t3Uv%@VeW zGL6(CwCUp)Jq%G0bIG%?{_*Y#5IHf*5M@wPo6A{$Um++Co$wLC=J1aoG93&T7Ho}P z=mGEPP7GbvoG!uD$k(H3A$Z))+i{Hy?QHdk>3xSBXR0j!11O^mEe9RHmw!pvzv?Ua~2_l2Yh~_!s1qS`|0~0)YsbHSz8!mG)WiJE| z2f($6TQtt6L_f~ApQYQKSb=`053LgrQq7G@98#igV>y#i==-nEjQ!XNu9 z~;mE+gtj4IDDNQJ~JVk5Ux6&LCSFL!y=>79kE9=V}J7tD==Ga+IW zX)r7>VZ9dY=V&}DR))xUoV!u(Z|%3ciQi_2jl}3=$Agc(`RPb z8kEBpvY>1FGQ9W$n>Cq=DIpski};nE)`p3IUw1Oz0|wxll^)4dq3;CCY@RyJgFgc# zKouFh!`?Xuo{IMz^xi-h=StCis_M7yq$u) z?XHvw*HP0VgR+KR6wI)jEMX|ssqYvSf*_3W8zVTQzD?3>H!#>InzpSO)@SC8q*ii- z%%h}_#0{4JG;Jm`4zg};BPTGkYamx$Xo#O~lBirRY)q=5M45n{GCfV7h9qwyu1NxOMoP4)jjZMxmT|IQQh0U7C$EbnMN<3)Kk?fFHYq$d|ICu>KbY_hO zTZM+uKHe(cIZfEqyzyYSUBZa8;Fcut-GN!HSA9ius`ltNebF46ZX_BbZNU}}ZOm{M2&nANL9@0qvih15(|`S~z}m&h!u4x~(%MAO$jHRWNfuxWF#B)E&g3ghSQ9|> z(MFaLQj)NE0lowyjvg8z0#m6FIuKE9lDO~Glg}nSb7`~^&#(Lw{}GVOS>U)m8bF}x zVjbXljBm34Cs-yM6TVusr+3kYFjr28STT3g056y3cH5Tmge~ASxBj z%|yb>$eF;WgrcOZf569sDZOVwoo%8>XO>XQOX1OyN9I-SQgrm;U;+#3OI(zrWyow3 zk==|{lt2xrQ%FIXOTejR>;wv(Pb8u8}BUpx?yd(Abh6? zsoO3VYWkeLnF43&@*#MQ9-i-d0t*xN-UEyNKeyNMHw|A(k(_6QKO=nKMCxD(W(Yop zsRQ)QeL4X3Lxp^L%wzi2-WVSsf61dqliPUM7srDB?Wm6Lzn0&{*}|IsKQW;02(Y&| zaTKv|`U(pSzuvR6Rduu$wzK_W-Y-7>7s?G$)U}&uK;<>vU}^^ns@Z!p+9?St1s)dG zK%y6xkPyyS1$~&6v{kl?Md6gwM|>mt6Upm>oa8RLD^8T{0?HC!Z>;(Bob7el(DV6x zi`I)$&E&ngwFS@bi4^xFLAn`=fzTC;aimE^!cMI2n@Vo%Ae-ne`RF((&5y6xsjjAZ zVguVoQ?Z9uk$2ON;ersE%PU*xGO@T*;j1BO5#TuZKEf(mB7|g7pcEA=nYJ{s3vlbg zd4-DUlD{*6o%Gc^N!Nptgay>j6E5;3psI+C3Q!1ZIbeCubW%w4pq9)MSDyB{HLm|k zxv-{$$A*pS@csolri$Ge<4VZ}e~78JOL-EVyrbxKra^d{?|NnPp86!q>t<&IP07?Z z^>~IK^k#OEKgRH+LjllZXk7iA>2cfH6+(e&9ku5poo~6y{GC5>(bRK7hwjiurqAiZ zg*DmtgY}v83IjE&AbiWgMyFbaRUPZ{lYiz$U^&Zt2YjG<%m((&_JUbZcfJ22(>bi5 z!J?<7AySj0JZ&<-qXX;mcV!f~>G=sB0KnjWca4}vrtunD^1TrpfeS^4dvFr!65knK zZh`d;*VOkPs4*-9kL>$GP0`(M!j~B;#x?Ba~&s6CopvO86oM?-? zOw#dIRc;6A6T?B`Qp%^<U5 z19x(ywSH$_N+Io!6;e?`tWaM$`=Db!gzx|lQ${DG!zb1Zl&|{kX0y6xvO1o z220r<-oaS^^R2pEyY;=Qllqpmue|5yI~D|iI!IGt@iod{Opz@*ml^w2bNs)p`M(Io z|E;;m*Xpjd9l)4G#KaWfV(t8YUn@A;nK^#xgv=LtnArX|vWQVuw3}B${h+frU2>9^ z!l6)!Uo4`5k`<<;E(ido7M6lKTgWezNLq>U*=uz&s=cc$1%>VrAeOoUtA|T6gO4>UNqsdK=NF*8|~*sl&wI=x9-EGiq*aqV!(VVXA57 zw9*o6Ir8Lj1npUXvlevtn(_+^X5rzdR>#(}4YcB9O50q97%rW2me5_L=%ffYPUSRc z!vv?Kv>dH994Qi>U(a<0KF6NH5b16enCp+mw^Hb3Xs1^tThFpz!3QuN#}KBbww`(h z7GO)1olDqy6?T$()R7y%NYx*B0k_2IBiZ14&8|JPFxeMF{vSTxF-Vi3+ZOI=Thq2} zyQgjYY1_7^ZQHh{?P))4+qUiQJLi1&{yE>h?~jU%tjdV0h|FENbM3X(KnJdPKc?~k zh=^Ixv*+smUll!DTWH!jrV*wSh*(mx0o6}1@JExzF(#9FXgmTXVoU+>kDe68N)dkQ zH#_98Zv$}lQwjKL@yBd;U(UD0UCl322=pav<=6g>03{O_3oKTq;9bLFX1ia*lw;#K zOiYDcBJf)82->83N_Y(J7Kr_3lE)hAu;)Q(nUVydv+l+nQ$?|%MWTy`t>{havFSQloHwiIkGK9YZ79^9?AZo0ZyQlVR#}lF%dn5n%xYksXf8gnBm=wO7g_^! zauQ-bH1Dc@3ItZ-9D_*pH}p!IG7j8A_o94#~>$LR|TFq zZ-b00*nuw|-5C2lJDCw&8p5N~Z1J&TrcyErds&!l3$eSz%`(*izc;-?HAFD9AHb-| z>)id`QCrzRws^9(#&=pIx9OEf2rmlob8sK&xPCWS+nD~qzU|qG6KwA{zbikcfQrdH z+ zQg>O<`K4L8rN7`GJB0*3<3`z({lWe#K!4AZLsI{%z#ja^OpfjU{!{)x0ZH~RB0W5X zTwN^w=|nA!4PEU2=LR05x~}|B&ZP?#pNgDMwD*ajI6oJqv!L81gu=KpqH22avXf0w zX3HjbCI!n9>l046)5rr5&v5ja!xkKK42zmqHzPx$9Nn_MZk`gLeSLgC=LFf;H1O#B zn=8|^1iRrujHfbgA+8i<9jaXc;CQBAmQvMGQPhFec2H1knCK2x!T`e6soyrqCamX% zTQ4dX_E*8so)E*TB$*io{$c6X)~{aWfaqdTh=xEeGvOAN9H&-t5tEE-qso<+C!2>+ zskX51H-H}#X{A75wqFe-J{?o8Bx|>fTBtl&tcbdR|132Ztqu5X0i-pisB-z8n71%q%>EF}yy5?z=Ve`}hVh{Drv1YWL zW=%ug_&chF11gDv3D6B)Tz5g54H0mDHNjuKZ+)CKFk4Z|$RD zfRuKLW`1B>B?*RUfVd0+u8h3r-{@fZ{k)c!93t1b0+Q9vOaRnEn1*IL>5Z4E4dZ!7 ztp4GP-^1d>8~LMeb}bW!(aAnB1tM_*la=Xx)q(I0Y@__Zd$!KYb8T2VBRw%e$iSdZ zkwdMwd}eV9q*;YvrBFTv1>1+}{H!JK2M*C|TNe$ZSA>UHKk);wz$(F$rXVc|sI^lD zV^?_J!3cLM;GJuBMbftbaRUs$;F}HDEDtIeHQ)^EJJ1F9FKJTGH<(Jj`phE6OuvE) zqK^K`;3S{Y#1M@8yRQwH`?kHMq4tHX#rJ>5lY3DM#o@or4&^_xtBC(|JpGTfrbGkA z2Tu+AyT^pHannww!4^!$5?@5v`LYy~T`qs7SYt$JgrY(w%C+IWA;ZkwEF)u5sDvOK zGk;G>Mh&elvXDcV69J_h02l&O;!{$({fng9Rlc3ID#tmB^FIG^w{HLUpF+iB`|
NnX)EH+Nua)3Y(c z&{(nX_ht=QbJ%DzAya}!&uNu!4V0xI)QE$SY__m)SAKcN0P(&JcoK*Lxr@P zY&P=}&B3*UWNlc|&$Oh{BEqwK2+N2U$4WB7Fd|aIal`FGANUa9E-O)!gV`((ZGCc$ zBJA|FFrlg~9OBp#f7aHodCe{6= zay$6vN~zj1ddMZ9gQ4p32(7wD?(dE>KA2;SOzXRmPBiBc6g`eOsy+pVcHu=;Yd8@{ zSGgXf@%sKKQz~;!J;|2fC@emm#^_rnO0esEn^QxXgJYd`#FPWOUU5b;9eMAF zZhfiZb|gk8aJIw*YLp4!*(=3l8Cp{(%p?ho22*vN9+5NLV0TTazNY$B5L6UKUrd$n zjbX%#m7&F#U?QNOBXkiiWB*_tk+H?N3`vg;1F-I+83{M2!8<^nydGr5XX}tC!10&e z7D36bLaB56WrjL&HiiMVtpff|K%|*{t*ltt^5ood{FOG0<>k&1h95qPio)2`eL${YAGIx(b4VN*~nKn6E~SIQUuRH zQ+5zP6jfnP$S0iJ@~t!Ai3o`X7biohli;E zT#yXyl{bojG@-TGZzpdVDXhbmF%F9+-^YSIv|MT1l3j zrxOFq>gd2%U}?6}8mIj?M zc077Zc9fq(-)4+gXv?Az26IO6eV`RAJz8e3)SC7~>%rlzDwySVx*q$ygTR5kW2ds- z!HBgcq0KON9*8Ff$X0wOq$`T7ml(@TF)VeoF}x1OttjuVHn3~sHrMB++}f7f9H%@f z=|kP_?#+fve@{0MlbkC9tyvQ_R?lRdRJ@$qcB(8*jyMyeME5ns6ypVI1Xm*Zr{DuS zZ!1)rQfa89c~;l~VkCiHI|PCBd`S*2RLNQM8!g9L6?n`^evQNEwfO@&JJRme+uopQX0%Jo zgd5G&#&{nX{o?TQwQvF1<^Cg3?2co;_06=~Hcb6~4XWpNFL!WU{+CK;>gH%|BLOh7@!hsa(>pNDAmpcuVO-?;Bic17R}^|6@8DahH)G z!EmhsfunLL|3b=M0MeK2vqZ|OqUqS8npxwge$w-4pFVXFq$_EKrZY?BuP@Az@(k`L z`ViQBSk`y+YwRT;&W| z2e3UfkCo^uTA4}Qmmtqs+nk#gNr2W4 zTH%hhErhB)pkXR{B!q5P3-OM+M;qu~f>}IjtF%>w{~K-0*jPVLl?Chz&zIdxp}bjx zStp&Iufr58FTQ36AHU)0+CmvaOpKF;W@sMTFpJ`j;3d)J_$tNQI^c<^1o<49Z(~K> z;EZTBaVT%14(bFw2ob@?JLQ2@(1pCdg3S%E4*dJ}dA*v}_a4_P(a`cHnBFJxNobAv zf&Zl-Yt*lhn-wjZsq<9v-IsXxAxMZ58C@e0!rzhJ+D@9^3~?~yllY^s$?&oNwyH!#~6x4gUrfxplCvK#!f z$viuszW>MFEcFL?>ux*((!L$;R?xc*myjRIjgnQX79@UPD$6Dz0jutM@7h_pq z0Zr)#O<^y_K6jfY^X%A-ip>P%3saX{!v;fxT-*0C_j4=UMH+Xth(XVkVGiiKE#f)q z%Jp=JT)uy{&}Iq2E*xr4YsJ5>w^=#-mRZ4vPXpI6q~1aFwi+lQcimO45V-JXP;>(Q zo={U`{=_JF`EQj87Wf}{Qy35s8r1*9Mxg({CvOt}?Vh9d&(}iI-quvs-rm~P;eRA@ zG5?1HO}puruc@S{YNAF3vmUc2B4!k*yi))<5BQmvd3tr}cIs#9)*AX>t`=~{f#Uz0 z0&Nk!7sSZwJe}=)-R^$0{yeS!V`Dh7w{w5rZ9ir!Z7Cd7dwZcK;BT#V0bzTt>;@Cl z#|#A!-IL6CZ@eHH!CG>OO8!%G8&8t4)Ro@}USB*k>oEUo0LsljsJ-%5Mo^MJF2I8- z#v7a5VdJ-Cd%(a+y6QwTmi+?f8Nxtm{g-+WGL>t;s#epv7ug>inqimZCVm!uT5Pf6 ziEgQt7^%xJf#!aPWbuC_3Nxfb&CFbQy!(8ANpkWLI4oSnH?Q3f?0k1t$3d+lkQs{~(>06l&v|MpcFsyAv zin6N!-;pggosR*vV=DO(#+}4ps|5$`udE%Kdmp?G7B#y%H`R|i8skKOd9Xzx8xgR$>Zo2R2Ytktq^w#ul4uicxW#{ zFjG_RNlBroV_n;a7U(KIpcp*{M~e~@>Q#Av90Jc5v%0c>egEdY4v3%|K1XvB{O_8G zkTWLC>OZKf;XguMH2-Pw{BKbFzaY;4v2seZV0>^7Q~d4O=AwaPhP3h|!hw5aqOtT@ z!SNz}$of**Bl3TK209@F=Tn1+mgZa8yh(Png%Zd6Mt}^NSjy)etQrF zme*llAW=N_8R*O~d2!apJnF%(JcN??=`$qs3Y+~xs>L9x`0^NIn!8mMRFA_tg`etw z3k{9JAjnl@ygIiJcNHTy02GMAvBVqEss&t2<2mnw!; zU`J)0>lWiqVqo|ex7!+@0i>B~BSU1A_0w#Ee+2pJx0BFiZ7RDHEvE*ptc9md(B{&+ zKE>TM)+Pd>HEmdJao7U@S>nL(qq*A)#eLOuIfAS@j`_sK0UEY6OAJJ-kOrHG zjHx`g!9j*_jRcJ%>CE9K2MVf?BUZKFHY?EpV6ai7sET-tqk=nDFh-(65rhjtlKEY% z@G&cQ<5BKatfdA1FKuB=i>CCC5(|9TMW%K~GbA4}80I5%B}(gck#Wlq@$nO3%@QP_ z8nvPkJFa|znk>V92cA!K1rKtr)skHEJD;k8P|R8RkCq1Rh^&}Evwa4BUJz2f!2=MH zo4j8Y$YL2313}H~F7@J7mh>u%556Hw0VUOz-Un@ZASCL)y8}4XXS`t1AC*^>PLwIc zUQok5PFS=*#)Z!3JZN&eZ6ZDP^-c@StY*t20JhCnbMxXf=LK#;`4KHEqMZ-Ly9KsS zI2VUJGY&PmdbM+iT)zek)#Qc#_i4uH43 z@T5SZBrhNCiK~~esjsO9!qBpaWK<`>!-`b71Y5ReXQ4AJU~T2Njri1CEp5oKw;Lnm)-Y@Z3sEY}XIgSy%xo=uek(kAAH5MsV$V3uTUsoTzxp_rF=tx zV07vlJNKtJhCu`b}*#m&5LV4TAE&%KtHViDAdv#c^x`J7bg z&N;#I2GkF@SIGht6p-V}`!F_~lCXjl1BdTLIjD2hH$J^YFN`7f{Q?OHPFEM$65^!u zNwkelo*5+$ZT|oQ%o%;rBX$+?xhvjb)SHgNHE_yP%wYkkvXHS{Bf$OiKJ5d1gI0j< zF6N}Aq=(WDo(J{e-uOecxPD>XZ@|u-tgTR<972`q8;&ZD!cep^@B5CaqFz|oU!iFj zU0;6fQX&~15E53EW&w1s9gQQ~Zk16X%6 zjG`j0yq}4deX2?Tr(03kg>C(!7a|b9qFI?jcE^Y>-VhudI@&LI6Qa}WQ>4H_!UVyF z((cm&!3gmq@;BD#5P~0;_2qgZhtJS|>WdtjY=q zLnHH~Fm!cxw|Z?Vw8*~?I$g#9j&uvgm7vPr#&iZgPP~v~BI4jOv;*OQ?jYJtzO<^y z7-#C={r7CO810!^s(MT!@@Vz_SVU)7VBi(e1%1rvS!?PTa}Uv`J!EP3s6Y!xUgM^8 z4f!fq<3Wer_#;u!5ECZ|^c1{|q_lh3m^9|nsMR1#Qm|?4Yp5~|er2?W^7~cl;_r4WSme_o68J9p03~Hc%X#VcX!xAu%1`R!dfGJCp zV*&m47>s^%Ib0~-2f$6oSgn3jg8m%UA;ArcdcRyM5;}|r;)?a^D*lel5C`V5G=c~k zy*w_&BfySOxE!(~PI$*dwG><+-%KT5p?whOUMA*k<9*gi#T{h3DAxzAPxN&Xws8o9Cp*`PA5>d9*Z-ynV# z9yY*1WR^D8|C%I@vo+d8r^pjJ$>eo|j>XiLWvTWLl(^;JHCsoPgem6PvegHb-OTf| zvTgsHSa;BkbG=(NgPO|CZu9gUCGr$8*EoH2_Z#^BnxF0yM~t`|9ws_xZ8X8iZYqh! zAh;HXJ)3P&)Q0(&F>!LN0g#bdbis-cQxyGn9Qgh`q+~49Fqd2epikEUw9caM%V6WgP)532RMRW}8gNS%V%Hx7apSz}tn@bQy!<=lbhmAH=FsMD?leawbnP5BWM0 z5{)@EEIYMu5;u)!+HQWhQ;D3_Cm_NADNeb-f56}<{41aYq8p4=93d=-=q0Yx#knGYfXVt z+kMxlus}t2T5FEyCN~!}90O_X@@PQpuy;kuGz@bWft%diBTx?d)_xWd_-(!LmVrh**oKg!1CNF&LX4{*j|) zIvjCR0I2UUuuEXh<9}oT_zT#jOrJAHNLFT~Ilh9hGJPI1<5`C-WA{tUYlyMeoy!+U zhA#=p!u1R7DNg9u4|QfED-2TuKI}>p#2P9--z;Bbf4Op*;Q9LCbO&aL2i<0O$ByoI z!9;Ght733FC>Pz>$_mw(F`zU?`m@>gE`9_p*=7o=7av`-&ifU(^)UU`Kg3Kw`h9-1 z6`e6+im=|m2v`pN(2dE%%n8YyQz;#3Q-|x`91z?gj68cMrHl}C25|6(_dIGk*8cA3 zRHB|Nwv{@sP4W+YZM)VKI>RlB`n=Oj~Rzx~M+Khz$N$45rLn6k1nvvD^&HtsMA4`s=MmuOJID@$s8Ph4E zAmSV^+s-z8cfv~Yd(40Sh4JG#F~aB>WFoX7ykaOr3JaJ&Lb49=B8Vk-SQT9%7TYhv z?-Pprt{|=Y5ZQ1?od|A<_IJU93|l4oAfBm?3-wk{O<8ea+`}u%(kub(LFo2zFtd?4 zwpN|2mBNywv+d^y_8#<$r>*5+$wRTCygFLcrwT(qc^n&@9r+}Kd_u@Ithz(6Qb4}A zWo_HdBj#V$VE#l6pD0a=NfB0l^6W^g`vm^sta>Tly?$E&{F?TTX~DsKF~poFfmN%2 z4x`Dc{u{Lkqz&y!33;X}weD}&;7p>xiI&ZUb1H9iD25a(gI|`|;G^NwJPv=1S5e)j z;U;`?n}jnY6rA{V^ zxTd{bK)Gi^odL3l989DQlN+Zs39Xe&otGeY(b5>rlIqfc7Ap4}EC?j<{M=hlH{1+d zw|c}}yx88_xQr`{98Z!d^FNH77=u(p-L{W6RvIn40f-BldeF-YD>p6#)(Qzf)lfZj z?3wAMtPPp>vMehkT`3gToPd%|D8~4`5WK{`#+}{L{jRUMt zrFz+O$C7y8$M&E4@+p+oV5c%uYzbqd2Y%SSgYy#xh4G3hQv>V*BnuKQhBa#=oZB~w{azUB+q%bRe_R^ z>fHBilnRTUfaJ201czL8^~Ix#+qOHSO)A|xWLqOxB$dT2W~)e-r9;bm=;p;RjYahB z*1hegN(VKK+ztr~h1}YP@6cfj{e#|sS`;3tJhIJK=tVJ-*h-5y9n*&cYCSdg#EHE# zSIx=r#qOaLJoVVf6v;(okg6?*L_55atl^W(gm^yjR?$GplNP>BZsBYEf_>wM0Lc;T zhf&gpzOWNxS>m+mN92N0{;4uw`P+9^*|-1~$uXpggj4- z^SFc4`uzj2OwdEVT@}Q`(^EcQ_5(ZtXTql*yGzdS&vrS_w>~~ra|Nb5abwf}Y!uq6R5f&6g2ge~2p(%c< z@O)cz%%rr4*cRJ5f`n@lvHNk@lE1a*96Kw6lJ~B-XfJW%?&-y?;E&?1AacU@`N`!O z6}V>8^%RZ7SQnZ-z$(jsX`amu*5Fj8g!3RTRwK^`2_QHe;_2y_n|6gSaGyPmI#kA0sYV<_qOZc#-2BO%hX)f$s-Z3xlI!ub z^;3ru11DA`4heAu%}HIXo&ctujzE2!6DIGE{?Zs>2}J+p&C$rc7gJC35gxhflorvsb%sGOxpuWhF)dL_&7&Z99=5M0b~Qa;Mo!j&Ti_kXW!86N%n= zSC@6Lw>UQ__F&+&Rzv?gscwAz8IP!n63>SP)^62(HK98nGjLY2*e^OwOq`3O|C92? z;TVhZ2SK%9AGW4ZavTB9?)mUbOoF`V7S=XM;#3EUpR+^oHtdV!GK^nXzCu>tpR|89 zdD{fnvCaN^^LL%amZ^}-E+214g&^56rpdc@yv0b<3}Ys?)f|fXN4oHf$six)-@<;W&&_kj z-B}M5U*1sb4)77aR=@%I?|Wkn-QJVuA96an25;~!gq(g1@O-5VGo7y&E_srxL6ZfS z*R%$gR}dyONgju*D&?geiSj7SZ@ftyA|}(*Y4KbvU!YLsi1EDQQCnb+-cM=K1io78o!v*);o<XwjaQH%)uIP&Zm?)Nfbfn;jIr z)d#!$gOe3QHp}2NBak@yYv3m(CPKkwI|{;d=gi552u?xj9ObCU^DJFQp4t4e1tPzM zvsRIGZ6VF+{6PvqsplMZWhz10YwS={?`~O0Ec$`-!klNUYtzWA^f9m7tkEzCy<_nS z=&<(awFeZvt51>@o_~>PLs05CY)$;}Oo$VDO)?l-{CS1Co=nxjqben*O1BR>#9`0^ zkwk^k-wcLCLGh|XLjdWv0_Hg54B&OzCE^3NCP}~OajK-LuRW53CkV~Su0U>zN%yQP zH8UH#W5P3-!ToO-2k&)}nFe`t+mdqCxxAHgcifup^gKpMObbox9LFK;LP3}0dP-UW z?Zo*^nrQ6*$FtZ(>kLCc2LY*|{!dUn$^RW~m9leoF|@Jy|M5p-G~j%+P0_#orRKf8 zvuu5<*XO!B?1E}-*SY~MOa$6c%2cM+xa8}_8x*aVn~57v&W(0mqN1W`5a7*VN{SUH zXz98DDyCnX2EPl-`Lesf`=AQT%YSDb`$%;(jUTrNen$NPJrlpPDP}prI>Ml!r6bCT;mjsg@X^#&<}CGf0JtR{Ecwd&)2zuhr#nqdgHj+g2n}GK9CHuwO zk>oZxy{vcOL)$8-}L^iVfJHAGfwN$prHjYV0ju}8%jWquw>}_W6j~m<}Jf!G?~r5&Rx)!9JNX!ts#SGe2HzobV5); zpj@&`cNcO&q+%*<%D7za|?m5qlmFK$=MJ_iv{aRs+BGVrs)98BlN^nMr{V_fcl_;jkzRju+c-y?gqBC_@J0dFLq-D9@VN&-`R9U;nv$Hg?>$oe4N&Ht$V_(JR3TG^! zzJsbQbi zFE6-{#9{G{+Z}ww!ycl*7rRdmU#_&|DqPfX3CR1I{Kk;bHwF6jh0opI`UV2W{*|nn zf_Y@%wW6APb&9RrbEN=PQRBEpM(N1w`81s=(xQj6 z-eO0k9=Al|>Ej|Mw&G`%q8e$2xVz1v4DXAi8G};R$y)ww638Y=9y$ZYFDM$}vzusg zUf+~BPX>(SjA|tgaFZr_e0{)+z9i6G#lgt=F_n$d=beAt0Sa0a7>z-?vcjl3e+W}+ z1&9=|vC=$co}-Zh*%3588G?v&U7%N1Qf-wNWJ)(v`iO5KHSkC5&g7CrKu8V}uQGcfcz zmBz#Lbqwqy#Z~UzHgOQ;Q-rPxrRNvl(&u6ts4~0=KkeS;zqURz%!-ERppmd%0v>iRlEf+H$yl{_8TMJzo0 z>n)`On|7=WQdsqhXI?#V{>+~}qt-cQbokEbgwV3QvSP7&hK4R{Z{aGHVS3;+h{|Hz z6$Js}_AJr383c_+6sNR|$qu6dqHXQTc6?(XWPCVZv=)D#6_;D_8P-=zOGEN5&?~8S zl5jQ?NL$c%O)*bOohdNwGIKM#jSAC?BVY={@A#c9GmX0=T(0G}xs`-%f3r=m6-cpK z!%waekyAvm9C3%>sixdZj+I(wQlbB4wv9xKI*T13DYG^T%}zZYJ|0$Oj^YtY+d$V$ zAVudSc-)FMl|54n=N{BnZTM|!>=bhaja?o7s+v1*U$!v!qQ%`T-6fBvmdPbVmro&d zk07TOp*KuxRUSTLRrBj{mjsnF8`d}rMViY8j`jo~Hp$fkv9F_g(jUo#Arp;Xw0M$~ zRIN!B22~$kx;QYmOkos@%|5k)!QypDMVe}1M9tZfkpXKGOxvKXB!=lo`p?|R1l=tA zp(1}c6T3Fwj_CPJwVsYtgeRKg?9?}%oRq0F+r+kdB=bFUdVDRPa;E~~>2$w}>O>v=?|e>#(-Lyx?nbg=ckJ#5U6;RT zNvHhXk$P}m9wSvFyU3}=7!y?Y z=fg$PbV8d7g25&-jOcs{%}wTDKm>!Vk);&rr;O1nvO0VrU&Q?TtYVU=ir`te8SLlS zKSNmV=+vF|ATGg`4$N1uS|n??f}C_4Sz!f|4Ly8#yTW-FBfvS48Tef|-46C(wEO_%pPhUC5$-~Y?!0vFZ^Gu`x=m7X99_?C-`|h zfmMM&Y@zdfitA@KPw4Mc(YHcY1)3*1xvW9V-r4n-9ZuBpFcf{yz+SR{ zo$ZSU_|fgwF~aakGr(9Be`~A|3)B=9`$M-TWKipq-NqRDRQc}ABo*s_5kV%doIX7LRLRau_gd@Rd_aLFXGSU+U?uAqh z8qusWWcvgQ&wu{|sRXmv?sl=xc<$6AR$+cl& zFNh5q1~kffG{3lDUdvEZu5c(aAG~+64FxdlfwY^*;JSS|m~CJusvi-!$XR`6@XtY2 znDHSz7}_Bx7zGq-^5{stTRy|I@N=>*y$zz>m^}^{d&~h;0kYiq8<^Wq7Dz0w31ShO^~LUfW6rfitR0(=3;Uue`Y%y@ex#eKPOW zO~V?)M#AeHB2kovn1v=n^D?2{2jhIQd9t|_Q+c|ZFaWt+r&#yrOu-!4pXAJuxM+Cx z*H&>eZ0v8Y`t}8{TV6smOj=__gFC=eah)mZt9gwz>>W$!>b3O;Rm^Ig*POZP8Rl0f zT~o=Nu1J|lO>}xX&#P58%Yl z83`HRs5#32Qm9mdCrMlV|NKNC+Z~ z9OB8xk5HJ>gBLi+m@(pvpw)1(OaVJKs*$Ou#@Knd#bk+V@y;YXT?)4eP9E5{J%KGtYinNYJUH9PU3A}66c>Xn zZ{Bn0<;8$WCOAL$^NqTjwM?5d=RHgw3!72WRo0c;+houoUA@HWLZM;^U$&sycWrFd zE7ekt9;kb0`lps{>R(}YnXlyGY}5pPd9zBpgXeJTY_jwaJGSJQC#-KJqmh-;ad&F- z-Y)E>!&`Rz!HtCz>%yOJ|v(u7P*I$jqEY3}(Z-orn4 zlI?CYKNl`6I){#2P1h)y(6?i;^z`N3bxTV%wNvQW+eu|x=kbj~s8rhCR*0H=iGkSj zk23lr9kr|p7#qKL=UjgO`@UnvzU)`&fI>1Qs7ubq{@+lK{hH* zvl6eSb9%yngRn^T<;jG1SVa)eA>T^XX=yUS@NCKpk?ovCW1D@!=@kn;l_BrG;hOTC z6K&H{<8K#dI(A+zw-MWxS+~{g$tI7|SfP$EYKxA}LlVO^sT#Oby^grkdZ^^lA}uEF zBSj$weBJG{+Bh@Yffzsw=HyChS(dtLE3i*}Zj@~!_T-Ay7z=B)+*~3|?w`Zd)Co2t zC&4DyB!o&YgSw+fJn6`sn$e)29`kUwAc+1MND7YjV%lO;H2}fNy>hD#=gT ze+-aFNpyKIoXY~Vq-}OWPBe?Rfu^{ps8>Xy%42r@RV#*QV~P83jdlFNgkPN=T|Kt7 zV*M`Rh*30&AWlb$;ae130e@}Tqi3zx2^JQHpM>j$6x`#{mu%tZlwx9Gj@Hc92IuY* zarmT|*d0E~vt6<+r?W^UW0&#U&)8B6+1+;k^2|FWBRP9?C4Rk)HAh&=AS8FS|NQaZ z2j!iZ)nbEyg4ZTp-zHwVlfLC~tXIrv(xrP8PAtR{*c;T24ycA-;auWsya-!kF~CWZ zw_uZ|%urXgUbc@x=L=_g@QJ@m#5beS@6W195Hn7>_}z@Xt{DIEA`A&V82bc^#!q8$ zFh?z_Vn|ozJ;NPd^5uu(9tspo8t%&-U9Ckay-s@DnM*R5rtu|4)~e)`z0P-sy?)kc zs_k&J@0&0!q4~%cKL)2l;N*T&0;mqX5T{Qy60%JtKTQZ-xb%KOcgqwJmb%MOOKk7N zgq})R_6**{8A|6H?fO+2`#QU)p$Ei2&nbj6TpLSIT^D$|`TcSeh+)}VMb}LmvZ{O| ze*1IdCt3+yhdYVxcM)Q_V0bIXLgr6~%JS<<&dxIgfL=Vnx4YHuU@I34JXA|+$_S3~ zy~X#gO_X!cSs^XM{yzDGNM>?v(+sF#<0;AH^YrE8smx<36bUsHbN#y57K8WEu(`qHvQ6cAZPo=J5C(lSmUCZ57Rj6cx!e^rfaI5%w}unz}4 zoX=nt)FVNV%QDJH`o!u9olLD4O5fl)xp+#RloZlaA92o3x4->?rB4`gS$;WO{R;Z3>cG3IgFX2EA?PK^M}@%1%A;?f6}s&CV$cIyEr#q5;yHdNZ9h{| z-=dX+a5elJoDo?Eq&Og!nN6A)5yYpnGEp}?=!C-V)(*~z-+?kY1Q7qs#Rsy%hu_60rdbB+QQNr?S1 z?;xtjUv|*E3}HmuNyB9aFL5H~3Ho0UsmuMZELp1a#CA1g`P{-mT?BchuLEtK}!QZ=3AWakRu~?f9V~3F;TV`5%9Pcs_$gq&CcU}r8gOO zC2&SWPsSG{&o-LIGTBqp6SLQZPvYKp$$7L4WRRZ0BR$Kf0I0SCFkqveCp@f)o8W)! z$%7D1R`&j7W9Q9CGus_)b%+B#J2G;l*FLz#s$hw{BHS~WNLODV#(!u_2Pe&tMsq={ zdm7>_WecWF#D=?eMjLj=-_z`aHMZ=3_-&E8;ibPmM}61i6J3is*=dKf%HC>=xbj4$ zS|Q-hWQ8T5mWde6h@;mS+?k=89?1FU<%qH9B(l&O>k|u_aD|DY*@~(`_pb|B#rJ&g zR0(~(68fpUPz6TdS@4JT5MOPrqDh5_H(eX1$P2SQrkvN8sTxwV>l0)Qq z0pzTuvtEAKRDkKGhhv^jk%|HQ1DdF%5oKq5BS>szk-CIke{%js?~%@$uaN3^Uz6Wf z_iyx{bZ(;9y4X&>LPV=L=d+A}7I4GkK0c1Xts{rrW1Q7apHf-))`BgC^0^F(>At1* za@e7{lq%yAkn*NH8Q1{@{lKhRg*^TfGvv!Sn*ed*x@6>M%aaqySxR|oNadYt1mpUZ z6H(rupHYf&Z z29$5g#|0MX#aR6TZ$@eGxxABRKakDYtD%5BmKp;HbG_ZbT+=81E&=XRk6m_3t9PvD zr5Cqy(v?gHcYvYvXkNH@S#Po~q(_7MOuCAB8G$a9BC##gw^5mW16cML=T=ERL7wsk zzNEayTG?mtB=x*wc@ifBCJ|irFVMOvH)AFRW8WE~U()QT=HBCe@s$dA9O!@`zAAT) zaOZ7l6vyR+Nk_OOF!ZlZmjoImKh)dxFbbR~z(cMhfeX1l7S_`;h|v3gI}n9$sSQ>+3@AFAy9=B_y$)q;Wdl|C-X|VV3w8 z2S#>|5dGA8^9%Bu&fhmVRrTX>Z7{~3V&0UpJNEl0=N32euvDGCJ>#6dUSi&PxFW*s zS`}TB>?}H(T2lxBJ!V#2taV;q%zd6fOr=SGHpoSG*4PDaiG0pdb5`jelVipkEk%FV zThLc@Hc_AL1#D&T4D=w@UezYNJ%0=f3iVRuVL5H?eeZM}4W*bomebEU@e2d`M<~uW zf#Bugwf`VezG|^Qbt6R_=U0}|=k;mIIakz99*>FrsQR{0aQRP6ko?5<7bkDN8evZ& zB@_KqQG?ErKL=1*ZM9_5?Pq%lcS4uLSzN(Mr5=t6xHLS~Ym`UgM@D&VNu8e?_=nSFtF$u@hpPSmI4Vo_t&v?>$~K4y(O~Rb*(MFy_igM7 z*~yYUyR6yQgzWnWMUgDov!!g=lInM+=lOmOk4L`O?{i&qxy&D*_qorRbDwj6?)!ef z#JLd7F6Z2I$S0iYI={rZNk*<{HtIl^mx=h>Cim*04K4+Z4IJtd*-)%6XV2(MCscPiw_a+y*?BKbTS@BZ3AUao^%Zi#PhoY9Vib4N>SE%4>=Jco0v zH_Miey{E;FkdlZSq)e<{`+S3W=*ttvD#hB8w=|2aV*D=yOV}(&p%0LbEWH$&@$X3x~CiF-?ejQ*N+-M zc8zT@3iwkdRT2t(XS`d7`tJQAjRmKAhiw{WOqpuvFp`i@Q@!KMhwKgsA}%@sw8Xo5Y=F zhRJZg)O4uqNWj?V&&vth*H#je6T}}p_<>!Dr#89q@uSjWv~JuW(>FqoJ5^ho0%K?E z9?x_Q;kmcsQ@5=}z@tdljMSt9-Z3xn$k)kEjK|qXS>EfuDmu(Z8|(W?gY6-l z@R_#M8=vxKMAoi&PwnaIYw2COJM@atcgfr=zK1bvjW?9B`-+Voe$Q+H$j!1$Tjn+* z&LY<%)L@;zhnJlB^Og6I&BOR-m?{IW;tyYC%FZ!&Z>kGjHJ6cqM-F z&19n+e1=9AH1VrVeHrIzqlC`w9=*zfmrerF?JMzO&|Mmv;!4DKc(sp+jy^Dx?(8>1 zH&yS_4yL7m&GWX~mdfgH*AB4{CKo;+egw=PrvkTaoBU+P-4u?E|&!c z)DKc;>$$B6u*Zr1SjUh2)FeuWLWHl5TH(UHWkf zLs>7px!c5n;rbe^lO@qlYLzlDVp(z?6rPZel=YB)Uv&n!2{+Mb$-vQl=xKw( zve&>xYx+jW_NJh!FV||r?;hdP*jOXYcLCp>DOtJ?2S^)DkM{{Eb zS$!L$e_o0(^}n3tA1R3-$SNvgBq;DOEo}fNc|tB%%#g4RA3{|euq)p+xd3I8^4E&m zFrD%}nvG^HUAIKe9_{tXB;tl|G<%>yk6R;8L2)KUJw4yHJXUOPM>(-+jxq4R;z8H#>rnJy*)8N+$wA$^F zN+H*3t)eFEgxLw+Nw3};4WV$qj&_D`%ADV2%r zJCPCo%{=z7;`F98(us5JnT(G@sKTZ^;2FVitXyLe-S5(hV&Ium+1pIUB(CZ#h|g)u zSLJJ<@HgrDiA-}V_6B^x1>c9B6%~847JkQ!^KLZ2skm;q*edo;UA)~?SghG8;QbHh z_6M;ouo_1rq9=x$<`Y@EA{C%6-pEV}B(1#sDoe_e1s3^Y>n#1Sw;N|}8D|s|VPd+g z-_$QhCz`vLxxrVMx3ape1xu3*wjx=yKSlM~nFgkNWb4?DDr*!?U)L_VeffF<+!j|b zZ$Wn2$TDv3C3V@BHpSgv3JUif8%hk%OsGZ=OxH@8&4`bbf$`aAMchl^qN>Eyu3JH} z9-S!x8-s4fE=lad%Pkp8hAs~u?|uRnL48O|;*DEU! zuS0{cpk%1E0nc__2%;apFsTm0bKtd&A0~S3Cj^?72-*Owk3V!ZG*PswDfS~}2<8le z5+W^`Y(&R)yVF*tU_s!XMcJS`;(Tr`J0%>p=Z&InR%D3@KEzzI+-2)HK zuoNZ&o=wUC&+*?ofPb0a(E6(<2Amd6%uSu_^-<1?hsxs~0K5^f(LsGqgEF^+0_H=uNk9S0bb!|O8d?m5gQjUKevPaO+*VfSn^2892K~%crWM8+6 z25@V?Y@J<9w%@NXh-2!}SK_(X)O4AM1-WTg>sj1{lj5@=q&dxE^9xng1_z9w9DK>| z6Iybcd0e zyi;Ew!KBRIfGPGytQ6}z}MeXCfLY0?9%RiyagSp_D1?N&c{ zyo>VbJ4Gy`@Fv+5cKgUgs~na$>BV{*em7PU3%lloy_aEovR+J7TfQKh8BJXyL6|P8un-Jnq(ghd!_HEOh$zlv2$~y3krgeH;9zC}V3f`uDtW(%mT#944DQa~^8ZI+zAUu4U(j0YcDfKR$bK#gvn_{JZ>|gZ5+)u?T$w7Q%F^;!Wk?G z(le7r!ufT*cxS}PR6hIVtXa)i`d$-_1KkyBU>qmgz-=T};uxx&sKgv48akIWQ89F{ z0XiY?WM^~;|T8zBOr zs#zuOONzH?svv*jokd5SK8wG>+yMC)LYL|vLqm^PMHcT=`}V$=nIRHe2?h)8WQa6O zPAU}d`1y(>kZiP~Gr=mtJLMu`i<2CspL|q2DqAgAD^7*$xzM`PU4^ga`ilE134XBQ z99P(LhHU@7qvl9Yzg$M`+dlS=x^(m-_3t|h>S}E0bcFMn=C|KamQ)=w2^e)35p`zY zRV8X?d;s^>Cof2SPR&nP3E+-LCkS0J$H!eh8~k0qo$}00b=7!H_I2O+Ro@3O$nPdm ztmbOO^B+IHzQ5w>@@@J4cKw5&^_w6s!s=H%&byAbUtczPQ7}wfTqxxtQNfn*u73Qw zGuWsrky_ajPx-5`R<)6xHf>C(oqGf_Fw|-U*GfS?xLML$kv;h_pZ@Kk$y0X(S+K80 z6^|z)*`5VUkawg}=z`S;VhZhxyDfrE0$(PMurAxl~<>lfZa>JZ288ULK7D` zl9|#L^JL}Y$j*j`0-K6kH#?bRmg#5L3iB4Z)%iF@SqT+Lp|{i`m%R-|ZE94Np7Pa5 zCqC^V3}B(FR340pmF*qaa}M}+h6}mqE~7Sh!9bDv9YRT|>vBNAqv09zXHMlcuhKD| zcjjA(b*XCIwJ33?CB!+;{)vX@9xns_b-VO{i0y?}{!sdXj1GM8+$#v>W7nw;+O_9B z_{4L;C6ol?(?W0<6taGEn1^uG=?Q3i29sE`RfYCaV$3DKc_;?HsL?D_fSYg}SuO5U zOB_f4^vZ_x%o`5|C@9C5+o=mFy@au{s)sKw!UgC&L35aH(sgDxRE2De%(%OT=VUdN ziVLEmdOvJ&5*tCMKRyXctCwQu_RH%;m*$YK&m;jtbdH#Ak~13T1^f89tn`A%QEHWs~jnY~E}p_Z$XC z=?YXLCkzVSK+Id`xZYTegb@W8_baLt-Fq`Tv|=)JPbFsKRm)4UW;yT+J`<)%#ue9DPOkje)YF2fsCilK9MIIK>p*`fkoD5nGfmLwt)!KOT+> zOFq*VZktDDyM3P5UOg`~XL#cbzC}eL%qMB=Q5$d89MKuN#$6|4gx_Jt0Gfn8w&q}%lq4QU%6#jT*MRT% zrLz~C8FYKHawn-EQWN1B75O&quS+Z81(zN)G>~vN8VwC+e+y(`>HcxC{MrJ;H1Z4k zZWuv$w_F0-Ub%MVcpIc){4PGL^I7M{>;hS?;eH!;gmcOE66z3;Z1Phqo(t zVP(Hg6q#0gIKgsg7L7WE!{Y#1nI(45tx2{$34dDd#!Z0NIyrm)HOn5W#7;f4pQci# zDW!FI(g4e668kI9{2+mLwB+=#9bfqgX%!B34V-$wwSN(_cm*^{y0jQtv*4}eO^sOV z*9xoNvX)c9isB}Tgx&ZRjp3kwhTVK?r9;n!x>^XYT z@Q^7zp{rkIs{2mUSE^2!Gf6$6;j~&4=-0cSJJDizZp6LTe8b45;{AKM%v99}{{FfC zz709%u0mC=1KXTo(=TqmZQ;c?$M3z(!xah>aywrj40sc2y3rKFw4jCq+Y+u=CH@_V zxz|qeTwa>+<|H%8Dz5u>ZI5MmjTFwXS-Fv!TDd*`>3{krWoNVx$<133`(ftS?ZPyY z&4@ah^3^i`vL$BZa>O|Nt?ucewzsF)0zX3qmM^|waXr=T0pfIb0*$AwU=?Ipl|1Y; z*Pk6{C-p4MY;j@IJ|DW>QHZQJcp;Z~?8(Q+Kk3^0qJ}SCk^*n4W zu9ZFwLHUx-$6xvaQ)SUQcYd6fF8&x)V`1bIuX@>{mE$b|Yd(qomn3;bPwnDUc0F=; zh*6_((%bqAYQWQ~odER?h>1mkL4kpb3s7`0m@rDKGU*oyF)$j~Ffd4fXV$?`f~rHf zB%Y)@5SXZvfwm10RY5X?TEo)PK_`L6qgBp=#>fO49$D zDq8Ozj0q6213tV5Qq=;fZ0$|KroY{Dz=l@lU^J)?Ko@ti20TRplXzphBi>XGx4bou zEWrkNjz0t5j!_ke{g5I#PUlEU$Km8g8TE|XK=MkU@PT4T><2OVamoK;wJ}3X0L$vX zgd7gNa359*nc)R-0!`2X@FOTB`+oETOPc=ubp5R)VQgY+5BTZZJ2?9QwnO=dnulIUF3gFn;BODC2)65)HeVd%t86sL7Rv^Y+nbn+&l z6BAJY(ETvwI)Ts$aiE8rht4KD*qNyE{8{x6R|%akbTBzw;2+6Echkt+W+`u^XX z_z&x%n@00IL3?^hro$gg*4VI_WAaTyVM5Foj~O|-84 z$;06hMwt*rV;^8iB z1~&0XWpYJmG?Ts^K9PC62H*`G}xom%S%yq|xvG~FIfP=9*f zZoDRJBm*Y0aId=qJ?7dyb)6)JGWGwe)MHeNSzhi)Ko6J<-m@v=a%NsP537lHe0R* z`If4$aaBA#S=w!2z&m>{lpTy^Lm^mg*3?M&7HFv}7K6x*cukLIGX;bQG|QWdn{%_6 zHnwBKr84#B7Z+AnBXa16a?or^R?+>$4`}{*a_>IhbjvyTtWkHw)|ay)ahWUd-qq$~ zMbh6roVsj;_qnC-R{G+Cy6bApVOinSU-;(DxUEl!i2)1EeQ9`hrfqj(nKI7?Z>Xur zoJz-a`PxkYit1HEbv|jy%~DO^13J-ut986EEG=66S}D3!L}Efp;Bez~7tNq{QsUMm zh9~(HYg1pA*=37C0}n4g&bFbQ+?-h-W}onYeE{q;cIy%eZK9wZjSwGvT+&Cgv z?~{9p(;bY_1+k|wkt_|N!@J~aoY@|U_RGoWX<;p{Nu*D*&_phw`8jYkMNpRTWx1H* z>J-Mi_!`M468#5Aix$$u1M@rJEIOc?k^QBc?T(#=n&*5eS#u*Y)?L8Ha$9wRWdH^3D4|Ps)Y?m0q~SiKiSfEkJ!=^`lJ(%W3o|CZ zSrZL-Xxc{OrmsQD&s~zPfNJOpSZUl%V8tdG%ei}lQkM+z@-4etFPR>GOH9+Y_F<3=~SXln9Kb-o~f>2a6Xz@AS3cn^;c_>lUwlK(n>z?A>NbC z`Ud8^aQy>wy=$)w;JZzA)_*Y$Z5hU=KAG&htLw1Uh00yE!|Nu{EZkch zY9O6x7Y??>!7pUNME*d!=R#s)ghr|R#41l!c?~=3CS8&zr6*aA7n9*)*PWBV2w+&I zpW1-9fr3j{VTcls1>ua}F*bbju_Xq%^v;-W~paSqlf zolj*dt`BBjHI)H9{zrkBo=B%>8}4jeBO~kWqO!~Thi!I1H(in=n^fS%nuL=X2+s!p}HfTU#NBGiwEBF^^tKU zbhhv+0dE-sbK$>J#t-J!B$TMgN@Wh5wTtK2BG}4BGfsZOoRUS#G8Cxv|6EI*n&Xxq zt{&OxCC+BNqz$9b0WM7_PyBJEVObHFh%%`~!@MNZlo*oXDCwDcFwT~Rls!aApL<)^ zbBftGKKBRhB!{?fX@l2_y~%ygNFfF(XJzHh#?`WlSL{1lKT*gJM zs>bd^H9NCxqxn(IOky5k-wALFowQr(gw%|`0991u#9jXQh?4l|l>pd6a&rx|v=fPJ z1mutj{YzpJ_gsClbWFk(G}bSlFi-6@mwoQh-XeD*j@~huW4(8ub%^I|azA)h2t#yG z7e_V_<4jlM3D(I+qX}yEtqj)cpzN*oCdYHa!nm%0t^wHm)EmFP*|FMw!tb@&`G-u~ zK)=Sf6z+BiTAI}}i{*_Ac$ffr*Wrv$F7_0gJkjx;@)XjYSh`RjAgrCck`x!zP>Ifu z&%he4P|S)H*(9oB4uvH67^0}I-_ye_!w)u3v2+EY>eD3#8QR24<;7?*hj8k~rS)~7 zSXs5ww)T(0eHSp$hEIBnW|Iun<_i`}VE0Nc$|-R}wlSIs5pV{g_Dar(Zz<4X3`W?K z6&CAIl4U(Qk-tTcK{|zYF6QG5ArrEB!;5s?tW7 zrE3hcFY&k)+)e{+YOJ0X2uDE_hd2{|m_dC}kgEKqiE9Q^A-+>2UonB+L@v3$9?AYw zVQv?X*pK;X4Ovc6Ev5Gbg{{Eu*7{N3#0@9oMI~}KnObQE#Y{&3mM4`w%wN+xrKYgD zB-ay0Q}m{QI;iY`s1Z^NqIkjrTlf`B)B#MajZ#9u41oRBC1oM1vq0i|F59> z#StM@bHt|#`2)cpl_rWB($DNJ3Lap}QM-+A$3pe}NyP(@+i1>o^fe-oxX#Bt`mcQc zb?pD4W%#ep|3%CHAYnr*^M6Czg>~L4?l16H1OozM{P*en298b+`i4$|w$|4AHbzqB zHpYUsHZET$Z0ztC;U+0*+amF!@PI%^oUIZy{`L{%O^i{Xk}X0&nl)n~tVEpcAJSJ} zverw15zP1P-O8h9nd!&hj$zuwjg?DoxYIw{jWM zW5_pj+wFy8Tsa9g<7Qa21WaV&;ejoYflRKcz?#fSH_)@*QVlN2l4(QNk| z4aPnv&mrS&0|6NHq05XQw$J^RR9T{3SOcMKCXIR1iSf+xJ0E_Wv?jEc*I#ZPzyJN2 zUG0UOXHl+PikM*&g$U@g+KbG-RY>uaIl&DEtw_Q=FYq?etc!;hEC_}UX{eyh%dw2V zTTSlap&5>PY{6I#(6`j-9`D&I#|YPP8a;(sOzgeKDWsLa!i-$frD>zr-oid!Hf&yS z!i^cr&7tN}OOGmX2)`8k?Tn!!4=tz~3hCTq_9CdiV!NIblUDxHh(FJ$zs)B2(t5@u z-`^RA1ShrLCkg0)OhfoM;4Z{&oZmAec$qV@ zGQ(7(!CBk<5;Ar%DLJ0p0!ResC#U<+3i<|vib1?{5gCebG7$F7URKZXuX-2WgF>YJ^i zMhHDBsh9PDU8dlZ$yJKtc6JA#y!y$57%sE>4Nt+wF1lfNIWyA`=hF=9Gj%sRwi@vd z%2eVV3y&dvAgyuJ=eNJR+*080dbO_t@BFJO<@&#yqTK&+xc|FRR;p;KVk@J3$S{p` zGaMj6isho#%m)?pOG^G0mzOAw0z?!AEMsv=0T>WWcE>??WS=fII$t$(^PDPMU(P>o z_*0s^W#|x)%tx8jIgZY~A2yG;US0m2ZOQt6yJqW@XNY_>_R7(Nxb8Ged6BdYW6{prd!|zuX$@Q2o6Ona8zzYC1u!+2!Y$Jc9a;wy+pXt}o6~Bu1oF1c zp7Y|SBTNi@=I(K%A60PMjM#sfH$y*c{xUgeSpi#HB`?|`!Tb&-qJ3;vxS!TIzuTZs-&%#bAkAyw9m4PJgvey zM5?up*b}eDEY+#@tKec)-c(#QF0P?MRlD1+7%Yk*jW;)`f;0a-ZJ6CQA?E%>i2Dt7T9?s|9ZF|KP4;CNWvaVKZ+Qeut;Jith_y{v*Ny6Co6!8MZx;Wgo z=qAi%&S;8J{iyD&>3CLCQdTX*$+Rx1AwA*D_J^0>suTgBMBb=*hefV+Ars#mmr+YsI3#!F@Xc1t4F-gB@6aoyT+5O(qMz*zG<9Qq*f0w^V!03rpr*-WLH}; zfM{xSPJeu6D(%8HU%0GEa%waFHE$G?FH^kMS-&I3)ycx|iv{T6Wx}9$$D&6{%1N_8 z_CLw)_9+O4&u94##vI9b-HHm_95m)fa??q07`DniVjAy`t7;)4NpeyAY(aAk(+T_O z1om+b5K2g_B&b2DCTK<>SE$Ode1DopAi)xaJjU>**AJK3hZrnhEQ9E`2=|HHe<^tv z63e(bn#fMWuz>4erc47}!J>U58%<&N<6AOAewyzNTqi7hJc|X{782&cM zHZYclNbBwU6673=!ClmxMfkC$(CykGR@10F!zN1Se83LR&a~$Ht&>~43OX22mt7tcZUpa;9@q}KDX3O&Ugp6< zLZLfIMO5;pTee1vNyVC$FGxzK2f>0Z-6hM82zKg44nWo|n}$Zk6&;5ry3`(JFEX$q zK&KivAe${e^5ZGc3a9hOt|!UOE&OocpVryE$Y4sPcs4rJ>>Kbi2_subQ9($2VN(3o zb~tEzMsHaBmBtaHAyES+d3A(qURgiskSSwUc9CfJ@99&MKp2sooSYZu+-0t0+L*!I zYagjOlPgx|lep9tiU%ts&McF6b0VE57%E0Ho%2oi?=Ks+5%aj#au^OBwNwhec zta6QAeQI^V!dF1C)>RHAmB`HnxyqWx?td@4sd15zPd*Fc9hpDXP23kbBenBxGeD$k z;%0VBQEJ-C)&dTAw_yW@k0u?IUk*NrkJ)(XEeI z9Y>6Vel>#s_v@=@0<{4A{pl=9cQ&Iah0iD0H`q)7NeCIRz8zx;! z^OO;1+IqoQNak&pV`qKW+K0^Hqp!~gSohcyS)?^P`JNZXw@gc6{A3OLZ?@1Uc^I2v z+X!^R*HCm3{7JPq{8*Tn>5;B|X7n4QQ0Bs79uTU%nbqOJh`nX(BVj!#f;#J+WZxx4 z_yM&1Y`2XzhfqkIMO7tB3raJKQS+H5F%o83bM+hxbQ zeeJm=Dvix$2j|b4?mDacb67v-1^lTp${z=jc1=j~QD>7c*@+1?py>%Kj%Ejp7Y-!? z8iYRUlGVrQPandAaxFfks53@2EC#0)%mrnmGRn&>=$H$S8q|kE_iWko4`^vCS2aWg z#!`RHUGyOt*k?bBYu3*j3u0gB#v(3tsije zgIuNNWNtrOkx@Pzs;A9un+2LX!zw+p3_NX^Sh09HZAf>m8l@O*rXy_82aWT$Q>iyy zqO7Of)D=wcSn!0+467&!Hl))eff=$aneB?R!YykdKW@k^_uR!+Q1tR)+IJb`-6=jj zymzA>Sv4>Z&g&WWu#|~GcP7qP&m*w-S$)7Xr;(duqCTe7p8H3k5>Y-n8438+%^9~K z3r^LIT_K{i7DgEJjIocw_6d0!<;wKT`X;&vv+&msmhAAnIe!OTdybPctzcEzBy88_ zWO{6i4YT%e4^WQZB)KHCvA(0tS zHu_Bg+6Ko%a9~$EjRB90`P(2~6uI@SFibxct{H#o&y40MdiXblu@VFXbhz>Nko;7R z70Ntmm-FePqhb%9gL+7U8@(ch|JfH5Fm)5${8|`Lef>LttM_iww6LW2X61ldBmG0z zax3y)njFe>j*T{i0s8D4=L>X^j0)({R5lMGVS#7(2C9@AxL&C-lZQx~czI7Iv+{%1 z2hEG>RzX4S8x3v#9sgGAnPzptM)g&LB}@%E>fy0vGSa(&q0ch|=ncKjNrK z`jA~jObJhrJ^ri|-)J^HUyeZXz~XkBp$VhcTEcTdc#a2EUOGVX?@mYx#Vy*!qO$Jv zQ4rgOJ~M*o-_Wptam=~krnmG*p^j!JAqoQ%+YsDFW7Cc9M%YPiBOrVcD^RY>m9Pd< zu}#9M?K{+;UIO!D9qOpq9yxUquQRmQNMo0pT`@$pVt=rMvyX)ph(-CCJLvUJy71DI zBk7oc7)-%ngdj~s@76Yse3L^gV0 z2==qfp&Q~L(+%RHP0n}+xH#k(hPRx(!AdBM$JCfJ5*C=K3ts>P?@@SZ_+{U2qFZb>4kZ{Go37{# zSQc+-dq*a-Vy4?taS&{Ht|MLRiS)Sn14JOONyXqPNnpq&2y~)6wEG0oNy>qvod$FF z`9o&?&6uZjhZ4_*5qWVrEfu(>_n2Xi2{@Gz9MZ8!YmjYvIMasE9yVQL10NBrTCczq zcTY1q^PF2l!Eraguf{+PtHV3=2A?Cu&NN&a8V(y;q(^_mFc6)%Yfn&X&~Pq zU1?qCj^LF(EQB1F`8NxNjyV%fde}dEa(Hx=r7$~ts2dzDwyi6ByBAIx$NllB4%K=O z$AHz1<2bTUb>(MCVPpK(E9wlLElo(aSd(Os)^Raum`d(g9Vd_+Bf&V;l=@mM=cC>) z)9b0enb)u_7V!!E_bl>u5nf&Rl|2r=2F3rHMdb7y9E}}F82^$Rf+P8%dKnOeKh1vs zhH^P*4Ydr^$)$h@4KVzxrHyy#cKmWEa9P5DJ|- zG;!Qi35Tp7XNj60=$!S6U#!(${6hyh7d4q=pF{`0t|N^|L^d8pD{O9@tF~W;#Je*P z&ah%W!KOIN;SyAEhAeTafJ4uEL`(RtnovM+cb(O#>xQnk?dzAjG^~4$dFn^<@-Na3 z395;wBnS{t*H;Jef2eE!2}u5Ns{AHj>WYZDgQJt8v%x?9{MXqJsGP|l%OiZqQ1aB! z%E=*Ig`(!tHh>}4_z5IMpg{49UvD*Pp9!pxt_gdAW%sIf3k6CTycOT1McPl=_#0?8 zVjz8Hj*Vy9c5-krd-{BQ{6Xy|P$6LJvMuX$* zA+@I_66_ET5l2&gk9n4$1M3LN8(yEViRx&mtd#LD}AqEs?RW=xKC(OCWH;~>(X6h!uDxXIPH06xh z*`F4cVlbDP`A)-fzf>MuScYsmq&1LUMGaQ3bRm6i7OsJ|%uhTDT zlvZA1M}nz*SalJWNT|`dBm1$xlaA>CCiQ zK`xD-RuEn>-`Z?M{1%@wewf#8?F|(@1e0+T4>nmlSRrNK5f)BJ2H*$q(H>zGD0>eL zQ!tl_Wk)k*e6v^m*{~A;@6+JGeWU-q9>?+L_#UNT%G?4&BnOgvm9@o7l?ov~XL+et zbGT)|G7)KAeqb=wHSPk+J1bdg7N3$vp(ekjI1D9V$G5Cj!=R2w=3*4!z*J-r-cyeb zd(i2KmX!|Lhey!snRw z?#$Gu%S^SQEKt&kep)up#j&9}e+3=JJBS(s>MH+|=R(`8xK{mmndWo_r`-w1#SeRD&YtAJ#GiVI*TkQZ}&aq<+bU2+coU3!jCI6E+Ad_xFW*ghnZ$q zAoF*i&3n1j#?B8x;kjSJD${1jdRB;)R*)Ao!9bd|C7{;iqDo|T&>KSh6*hCD!rwv= zyK#F@2+cv3=|S1Kef(E6Niv8kyLVLX&e=U;{0x{$tDfShqkjUME>f8d(5nzSkY6@! z^-0>DM)wa&%m#UF1F?zR`8Y3X#tA!*7Q$P3lZJ%*KNlrk_uaPkxw~ zxZ1qlE;Zo;nb@!SMazSjM>;34ROOoygo%SF);LL>rRonWwR>bmSd1XD^~sGSu$Gg# zFZ`|yKU0%!v07dz^v(tY%;So(e`o{ZYTX`hm;@b0%8|H>VW`*cr8R%3n|ehw2`(9B+V72`>SY}9^8oh$En80mZK9T4abVG*to;E z1_S6bgDOW?!Oy1LwYy=w3q~KKdbNtyH#d24PFjX)KYMY93{3-mPP-H>@M-_>N~DDu zENh~reh?JBAK=TFN-SfDfT^=+{w4ea2KNWXq2Y<;?(gf(FgVp8Zp-oEjKzB%2Iqj;48GmY3h=bcdYJ}~&4tS`Q1sb=^emaW$IC$|R+r-8V- zf0$gGE(CS_n4s>oicVk)MfvVg#I>iDvf~Ov8bk}sSxluG!6#^Z_zhB&U^`eIi1@j( z^CK$z^stBHtaDDHxn+R;3u+>Lil^}fj?7eaGB z&5nl^STqcaBxI@v>%zG|j))G(rVa4aY=B@^2{TFkW~YP!8!9TG#(-nOf^^X-%m9{Z zCC?iC`G-^RcBSCuk=Z`(FaUUe?hf3{0C>>$?Vs z`2Uud9M+T&KB6o4o9kvdi^Q=Bw!asPdxbe#W-Oaa#_NP(qpyF@bVxv5D5))srkU#m zj_KA+#7sqDn*Ipf!F5Byco4HOSd!Ui$l94|IbW%Ny(s1>f4|Mv^#NfB31N~kya9!k zWCGL-$0ZQztBate^fd>R!hXY_N9ZjYp3V~4_V z#eB)Kjr8yW=+oG)BuNdZG?jaZlw+l_ma8aET(s+-x+=F-t#Qoiuu1i`^x8Sj>b^U} zs^z<()YMFP7CmjUC@M=&lA5W7t&cxTlzJAts*%PBDAPuqcV5o7HEnqjif_7xGt)F% zGx2b4w{@!tE)$p=l3&?Bf#`+!-RLOleeRk3 z7#pF|w@6_sBmn1nECqdunmG^}pr5(ZJQVvAt$6p3H(16~;vO>?sTE`Y+mq5YP&PBo zvq!7#W$Gewy`;%6o^!Dtjz~x)T}Bdk*BS#=EY=ODD&B=V6TD2z^hj1m5^d6s)D*wk zu$z~D7QuZ2b?5`p)E8e2_L38v3WE{V`bVk;6fl#o2`) z99JsWhh?$oVRn@$S#)uK&8DL8>An0&S<%V8hnGD7Z^;Y(%6;^9!7kDQ5bjR_V+~wp zfx4m3z6CWmmZ<8gDGUyg3>t8wgJ5NkkiEm^(sedCicP^&3D%}6LtIUq>mXCAt{9eF zNXL$kGcoUTf_Lhm`t;hD-SE)m=iBnxRU(NyL}f6~1uH)`K!hmYZjLI%H}AmEF5RZt z06$wn63GHnApHXZZJ}s^s)j9(BM6e*7IBK6Bq(!)d~zR#rbxK9NVIlgquoMq z=eGZ9NR!SEqP6=9UQg#@!rtbbSBUM#ynF);zKX+|!Zm}*{H z+j=d?aZ2!?@EL7C~%B?6ouCKLnO$uWn;Y6Xz zX8dSwj732u(o*U3F$F=7xwxm>E-B+SVZH;O-4XPuPkLSt_?S0)lb7EEg)Mglk0#eS z9@jl(OnH4juMxY+*r03VDfPx_IM!Lmc(5hOI;`?d37f>jPP$?9jQQIQU@i4vuG6MagEoJrQ=RD7xt@8E;c zeGV*+Pt+t$@pt!|McETOE$9k=_C!70uhwRS9X#b%ZK z%q(TIUXSS^F0`4Cx?Rk07C6wI4!UVPeI~-fxY6`YH$kABdOuiRtl73MqG|~AzZ@iL&^s?24iS;RK_pdlWkhcF z@Wv-Om(Aealfg)D^adlXh9Nvf~Uf@y;g3Y)i(YP zEXDnb1V}1pJT5ZWyw=1i+0fni9yINurD=EqH^ciOwLUGi)C%Da)tyt=zq2P7pV5-G zR7!oq28-Fgn5pW|nlu^b!S1Z#r7!Wtr{5J5PQ>pd+2P7RSD?>(U7-|Y z7ZQ5lhYIl_IF<9?T9^IPK<(Hp;l5bl5tF9>X-zG14_7PfsA>6<$~A338iYRT{a@r_ zuXBaT=`T5x3=s&3=RYx6NgG>No4?5KFBVjE(swfcivcIpPQFx5l+O;fiGsOrl5teR z_Cm+;PW}O0Dwe_(4Z@XZ)O0W-v2X><&L*<~*q3dg;bQW3g7)a#3KiQP>+qj|qo*Hk z?57>f2?f@`=Fj^nkDKeRkN2d$Z@2eNKpHo}ksj-$`QKb6n?*$^*%Fb3_Kbf1(*W9K>{L$mud2WHJ=j0^=g30Xhg8$#g^?36`p1fm;;1@0Lrx+8t`?vN0ZorM zSW?rhjCE8$C|@p^sXdx z|NOHHg+fL;HIlqyLp~SSdIF`TnSHehNCU9t89yr@)FY<~hu+X`tjg(aSVae$wDG*C zq$nY(Y494R)hD!i1|IIyP*&PD_c2FPgeY)&mX1qujB1VHPG9`yFQpLFVQ0>EKS@Bp zAfP5`C(sWGLI?AC{XEjLKR4FVNw(4+9b?kba95ukgR1H?w<8F7)G+6&(zUhIE5Ef% z=fFkL3QKA~M@h{nzjRq!Y_t!%U66#L8!(2-GgFxkD1=JRRqk=n%G(yHKn%^&$dW>; zSjAcjETMz1%205se$iH_)ZCpfg_LwvnsZQAUCS#^FExp8O4CrJb6>JquNV@qPq~3A zZ<6dOU#6|8+fcgiA#~MDmcpIEaUO02L5#T$HV0$EMD94HT_eXLZ2Zi&(! z&5E>%&|FZ`)CN10tM%tLSPD*~r#--K(H-CZqIOb99_;m|D5wdgJ<1iOJz@h2Zkq?} z%8_KXb&hf=2Wza(Wgc;3v3TN*;HTU*q2?#z&tLn_U0Nt!y>Oo>+2T)He6%XuP;fgn z-G!#h$Y2`9>Jtf}hbVrm6D70|ERzLAU>3zoWhJmjWfgM^))T+2u$~5>HF9jQDkrXR z=IzX36)V75PrFjkQ%TO+iqKGCQ-DDXbaE;C#}!-CoWQx&v*vHfyI>$HNRbpvm<`O( zlx9NBWD6_e&J%Ous4yp~s6)Ghni!I6)0W;9(9$y1wWu`$gs<$9Mcf$L*piP zPR0Av*2%ul`W;?-1_-5Zy0~}?`e@Y5A&0H!^ApyVTT}BiOm4GeFo$_oPlDEyeGBbh z1h3q&Dx~GmUS|3@4V36&$2uO8!Yp&^pD7J5&TN{?xphf*-js1fP?B|`>p_K>lh{ij zP(?H%e}AIP?_i^f&Li=FDSQ`2_NWxL+BB=nQr=$ zHojMlXNGauvvwPU>ZLq!`bX-5F4jBJ&So{kE5+ms9UEYD{66!|k~3vsP+mE}x!>%P za98bAU0!h0&ka4EoiDvBM#CP#dRNdXJcb*(%=<(g+M@<)DZ!@v1V>;54En?igcHR2 zhubQMq}VSOK)onqHfczM7YA@s=9*ow;k;8)&?J3@0JiGcP! zP#00KZ1t)GyZeRJ=f0^gc+58lc4Qh*S7RqPIC6GugG1gXe$LIQMRCo8cHf^qXgAa2 z`}t>u2Cq1CbSEpLr~E=c7~=Qkc9-vLE%(v9N*&HF`(d~(0`iukl5aQ9u4rUvc8%m) zr2GwZN4!s;{SB87lJB;veebPmqE}tSpT>+`t?<457Q9iV$th%i__Z1kOMAswFldD6 ztbOvO337S5o#ZZgN2G99_AVqPv!?Gmt3pzgD+Hp3QPQ`9qJ(g=kjvD+fUSS3upJn! zqoG7acIKEFRX~S}3|{EWT$kdz#zrDlJU(rPkxjws_iyLKU8+v|*oS_W*-guAb&Pj1 z35Z`3z<&Jb@2Mwz=KXucNYdY#SNO$tcVFr9KdKm|%^e-TXzs6M`PBper%ajkrIyUe zp$vVxVs9*>Vp4_1NC~Zg)WOCPmOxI1V34QlG4!aSFOH{QqSVq1^1)- z0P!Z?tT&E-ll(pwf0?=F=yOzik=@nh1Clxr9}Vij89z)ePDSCYAqw?lVI?v?+&*zH z)p$CScFI8rrwId~`}9YWPFu0cW1Sf@vRELs&cbntRU6QfPK-SO*mqu|u~}8AJ!Q$z znzu}50O=YbjwKCuSVBs6&CZR#0FTu)3{}qJJYX(>QPr4$RqWiwX3NT~;>cLn*_&1H zaKpIW)JVJ>b{uo2oq>oQt3y=zJjb%fU@wLqM{SyaC6x2snMx-}ivfU<1- znu1Lh;i$3Tf$Kh5Uk))G!D1UhE8pvx&nO~w^fG)BC&L!_hQk%^p`Kp@F{cz>80W&T ziOK=Sq3fdRu*V0=S53rcIfWFazI}Twj63CG(jOB;$*b`*#B9uEnBM`hDk*EwSRdwP8?5T?xGUKs=5N83XsR*)a4|ijz|c{4tIU+4j^A5C<#5 z*$c_d=5ml~%pGxw#?*q9N7aRwPux5EyqHVkdJO=5J>84!X6P>DS8PTTz>7C#FO?k#edkntG+fJk8ZMn?pmJSO@`x-QHq;7^h6GEXLXo1TCNhH z8ZDH{*NLAjo3WM`xeb=X{((uv3H(8&r8fJJg_uSs_%hOH%JDD?hu*2NvWGYD+j)&` zz#_1%O1wF^o5ryt?O0n;`lHbzp0wQ?rcbW(F1+h7_EZZ9{>rePvLAPVZ_R|n@;b$;UchU=0j<6k8G9QuQf@76oiE*4 zXOLQ&n3$NR#p4<5NJMVC*S);5x2)eRbaAM%VxWu9ohlT;pGEk7;002enCbQ>2r-us z3#bpXP9g|mE`65VrN`+3mC)M(eMj~~eOf)do<@l+fMiTR)XO}422*1SL{wyY(%oMpBgJagtiDf zz>O6(m;};>Hi=t8o{DVC@YigqS(Qh+ix3Rwa9aliH}a}IlOCW1@?%h_bRbq-W{KHF z%Vo?-j@{Xi@=~Lz5uZP27==UGE15|g^0gzD|3x)SCEXrx`*MP^FDLl%pOi~~Il;dc z^hrwp9sYeT7iZ)-ajKy@{a`kr0-5*_!XfBpXwEcFGJ;%kV$0Nx;apKrur zJN2J~CAv{Zjj%FolyurtW8RaFmpn&zKJWL>(0;;+q(%(Hx!GMW4AcfP0YJ*Vz!F4g z!ZhMyj$BdXL@MlF%KeInmPCt~9&A!;cRw)W!Hi@0DY(GD_f?jeV{=s=cJ6e}JktJw zQORnxxj3mBxfrH=x{`_^Z1ddDh}L#V7i}$njUFRVwOX?qOTKjfPMBO4y(WiU<)epb zvB9L=%jW#*SL|Nd_G?E*_h1^M-$PG6Pc_&QqF0O-FIOpa4)PAEPsyvB)GKasmBoEt z?_Q2~QCYGH+hW31x-B=@5_AN870vY#KB~3a*&{I=f);3Kv7q4Q7s)0)gVYx2#Iz9g(F2;=+Iy4 z6KI^8GJ6D@%tpS^8boU}zpi=+(5GfIR)35PzrbuXeL1Y1N%JK7PG|^2k3qIqHfX;G zQ}~JZ-UWx|60P5?d1e;AHx!_;#PG%d=^X(AR%i`l0jSpYOpXoKFW~7ip7|xvN;2^? zsYC9fanpO7rO=V7+KXqVc;Q5z%Bj})xHVrgoR04sA2 zl~DAwv=!(()DvH*=lyhIlU^hBkA0$e*7&fJpB0|oB7)rqGK#5##2T`@_I^|O2x4GO z;xh6ROcV<9>?e0)MI(y++$-ksV;G;Xe`lh76T#Htuia+(UrIXrf9?

L(tZ$0BqX1>24?V$S+&kLZ`AodQ4_)P#Q3*4xg8}lMV-FLwC*cN$< zt65Rf%7z41u^i=P*qO8>JqXPrinQFapR7qHAtp~&RZ85$>ob|Js;GS^y;S{XnGiBc zGa4IGvDl?x%gY`vNhv8wgZnP#UYI-w*^4YCZnxkF85@ldepk$&$#3EAhrJY0U)lR{F6sM3SONV^+$;Zx8BD&Eku3K zKNLZyBni3)pGzU0;n(X@1fX8wYGKYMpLmCu{N5-}epPDxClPFK#A@02WM3!myN%bkF z|GJ4GZ}3sL{3{qXemy+#Uk{4>Kf8v11;f8I&c76+B&AQ8udd<8gU7+BeWC`akUU~U zgXoxie>MS@rBoyY8O8Tc&8id!w+_ooxcr!1?#rc$-|SBBtH6S?)1e#P#S?jFZ8u-Bs&k`yLqW|{j+%c#A4AQ>+tj$Y z^CZajspu$F%73E68Lw5q7IVREED9r1Ijsg#@DzH>wKseye>hjsk^{n0g?3+gs@7`i zHx+-!sjLx^fS;fY!ERBU+Q zVJ!e0hJH%P)z!y%1^ZyG0>PN@5W~SV%f>}c?$H8r;Sy-ui>aruVTY=bHe}$e zi&Q4&XK!qT7-XjCrDaufT@>ieQ&4G(SShUob0Q>Gznep9fR783jGuUynAqc6$pYX; z7*O@@JW>O6lKIk0G00xsm|=*UVTQBB`u1f=6wGAj%nHK_;Aqmfa!eAykDmi-@u%6~ z;*c!pS1@V8r@IX9j&rW&d*}wpNs96O2Ute>%yt{yv>k!6zfT6pru{F1M3P z2WN1JDYqoTB#(`kE{H676QOoX`cnqHl1Yaru)>8Ky~VU{)r#{&s86Vz5X)v15ULHA zAZDb{99+s~qI6;-dQ5DBjHJP@GYTwn;Dv&9kE<0R!d z8tf1oq$kO`_sV(NHOSbMwr=To4r^X$`sBW4$gWUov|WY?xccQJN}1DOL|GEaD_!@& z15p?Pj+>7d`@LvNIu9*^hPN)pwcv|akvYYq)ks%`G>!+!pW{-iXPZsRp8 z35LR;DhseQKWYSD`%gO&k$Dj6_6q#vjWA}rZcWtQr=Xn*)kJ9kacA=esi*I<)1>w^ zO_+E>QvjP)qiSZg9M|GNeLtO2D7xT6vsj`88sd!94j^AqxFLi}@w9!Y*?nwWARE0P znuI_7A-saQ+%?MFA$gttMV-NAR^#tjl_e{R$N8t2NbOlX373>e7Ox=l=;y#;M7asp zRCz*CLnrm$esvSb5{T<$6CjY zmZ(i{Rs_<#pWW>(HPaaYj`%YqBra=Ey3R21O7vUbzOkJJO?V`4-D*u4$Me0Bx$K(lYo`JO}gnC zx`V}a7m-hLU9Xvb@K2ymioF)vj12<*^oAqRuG_4u%(ah?+go%$kOpfb`T96P+L$4> zQ#S+sA%VbH&mD1k5Ak7^^dZoC>`1L%i>ZXmooA!%GI)b+$D&ziKrb)a=-ds9xk#~& z7)3iem6I|r5+ZrTRe_W861x8JpD`DDIYZNm{$baw+$)X^Jtjnl0xlBgdnNY}x%5za zkQ8E6T<^$sKBPtL4(1zi_Rd(tVth*3Xs!ulflX+70?gb&jRTnI8l+*Aj9{|d%qLZ+ z>~V9Z;)`8-lds*Zgs~z1?Fg?Po7|FDl(Ce<*c^2=lFQ~ahwh6rqSjtM5+$GT>3WZW zj;u~w9xwAhOc<kF}~`CJ68 z?(S5vNJa;kriPlim33{N5`C{9?NWhzsna_~^|K2k4xz1`xcui*LXL-1#Y}Hi9`Oo!zQ>x-kgAX4LrPz63uZ+?uG*84@PKq-KgQlMNRwz=6Yes) zY}>YN+qP}nwr$(CZQFjUOI=-6J$2^XGvC~EZ+vrqWaOXB$k?%Suf5k=4>AveC1aJ! ziaW4IS%F$_Babi)kA8Y&u4F7E%99OPtm=vzw$$ zEz#9rvn`Iot_z-r3MtV>k)YvErZ<^Oa${`2>MYYODSr6?QZu+be-~MBjwPGdMvGd!b!elsdi4% z`37W*8+OGulab8YM?`KjJ8e+jM(tqLKSS@=jimq3)Ea2EB%88L8CaM+aG7;27b?5` z4zuUWBr)f)k2o&xg{iZ$IQkJ+SK>lpq4GEacu~eOW4yNFLU!Kgc{w4&D$4ecm0f}~ zTTzquRW@`f0}|IILl`!1P+;69g^upiPA6F{)U8)muWHzexRenBU$E^9X-uIY2%&1w z_=#5*(nmxJ9zF%styBwivi)?#KMG96-H@hD-H_&EZiRNsfk7mjBq{L%!E;Sqn!mVX*}kXhwH6eh;b42eD!*~upVG@ z#smUqz$ICm!Y8wY53gJeS|Iuard0=;k5i5Z_hSIs6tr)R4n*r*rE`>38Pw&lkv{_r!jNN=;#?WbMj|l>cU(9trCq; z%nN~r^y7!kH^GPOf3R}?dDhO=v^3BeP5hF|%4GNQYBSwz;x({21i4OQY->1G=KFyu z&6d`f2tT9Yl_Z8YACZaJ#v#-(gcyeqXMhYGXb=t>)M@fFa8tHp2x;ODX=Ap@a5I=U z0G80^$N0G4=U(>W%mrrThl0DjyQ-_I>+1Tdd_AuB3qpYAqY54upwa3}owa|x5iQ^1 zEf|iTZxKNGRpI>34EwkIQ2zHDEZ=(J@lRaOH>F|2Z%V_t56Km$PUYu^xA5#5Uj4I4RGqHD56xT%H{+P8Ag>e_3pN$4m8n>i%OyJFPNWaEnJ4McUZPa1QmOh?t8~n& z&RulPCors8wUaqMHECG=IhB(-tU2XvHP6#NrLVyKG%Ee*mQ5Ps%wW?mcnriTVRc4J`2YVM>$ixSF2Xi+Wn(RUZnV?mJ?GRdw%lhZ+t&3s7g!~g{%m&i<6 z5{ib-<==DYG93I(yhyv4jp*y3#*WNuDUf6`vTM%c&hiayf(%=x@4$kJ!W4MtYcE#1 zHM?3xw63;L%x3drtd?jot!8u3qeqctceX3m;tWetK+>~q7Be$h>n6riK(5@ujLgRS zvOym)k+VAtyV^mF)$29Y`nw&ijdg~jYpkx%*^ z8dz`C*g=I?;clyi5|!27e2AuSa$&%UyR(J3W!A=ZgHF9OuKA34I-1U~pyD!KuRkjA zbkN!?MfQOeN>DUPBxoy5IX}@vw`EEB->q!)8fRl_mqUVuRu|C@KD-;yl=yKc=ZT0% zB$fMwcC|HE*0f8+PVlWHi>M`zfsA(NQFET?LrM^pPcw`cK+Mo0%8*x8@65=CS_^$cG{GZQ#xv($7J z??R$P)nPLodI;P!IC3eEYEHh7TV@opr#*)6A-;EU2XuogHvC;;k1aI8asq7ovoP!* z?x%UoPrZjj<&&aWpsbr>J$Er-7!E(BmOyEv!-mbGQGeJm-U2J>74>o5x`1l;)+P&~ z>}f^=Rx(ZQ2bm+YE0u=ZYrAV@apyt=v1wb?R@`i_g64YyAwcOUl=C!i>=Lzb$`tjv zOO-P#A+)t-JbbotGMT}arNhJmmGl-lyUpMn=2UacVZxmiG!s!6H39@~&uVokS zG=5qWhfW-WOI9g4!R$n7!|ViL!|v3G?GN6HR0Pt_L5*>D#FEj5wM1DScz4Jv@Sxnl zB@MPPmdI{(2D?;*wd>3#tjAirmUnQoZrVv`xM3hARuJksF(Q)wd4P$88fGYOT1p6U z`AHSN!`St}}UMBT9o7i|G`r$ zrB=s$qV3d6$W9@?L!pl0lf%)xs%1ko^=QY$ty-57=55PvP(^6E7cc zGJ*>m2=;fOj?F~yBf@K@9qwX0hA803Xw+b0m}+#a(>RyR8}*Y<4b+kpp|OS+!whP( zH`v{%s>jsQI9rd$*vm)EkwOm#W_-rLTHcZRek)>AtF+~<(did)*oR1|&~1|e36d-d zgtm5cv1O0oqgWC%Et@P4Vhm}Ndl(Y#C^MD03g#PH-TFy+7!Osv1z^UWS9@%JhswEq~6kSr2DITo59+; ze=ZC}i2Q?CJ~Iyu?vn|=9iKV>4j8KbxhE4&!@SQ^dVa-gK@YfS9xT(0kpW*EDjYUkoj! zE49{7H&E}k%5(>sM4uGY)Q*&3>{aitqdNnRJkbOmD5Mp5rv-hxzOn80QsG=HJ_atI-EaP69cacR)Uvh{G5dTpYG7d zbtmRMq@Sexey)||UpnZ?;g_KMZq4IDCy5}@u!5&B^-=6yyY{}e4Hh3ee!ZWtL*s?G zxG(A!<9o!CL+q?u_utltPMk+hn?N2@?}xU0KlYg?Jco{Yf@|mSGC<(Zj^yHCvhmyx z?OxOYoxbptDK()tsJ42VzXdINAMWL$0Gcw?G(g8TMB)Khw_|v9`_ql#pRd2i*?CZl z7k1b!jQB=9-V@h%;Cnl7EKi;Y^&NhU0mWEcj8B|3L30Ku#-9389Q+(Yet0r$F=+3p z6AKOMAIi|OHyzlHZtOm73}|ntKtFaXF2Fy|M!gOh^L4^62kGUoWS1i{9gsds_GWBc zLw|TaLP64z3z9?=R2|T6Xh2W4_F*$cq>MtXMOy&=IPIJ`;!Tw?PqvI2b*U1)25^<2 zU_ZPoxg_V0tngA0J+mm?3;OYw{i2Zb4x}NedZug!>EoN3DC{1i)Z{Z4m*(y{ov2%- zk(w>+scOO}MN!exSc`TN)!B=NUX`zThWO~M*ohqq;J2hx9h9}|s#?@eR!=F{QTrq~ zTcY|>azkCe$|Q0XFUdpFT=lTcyW##i;-e{}ORB4D?t@SfqGo_cS z->?^rh$<&n9DL!CF+h?LMZRi)qju!meugvxX*&jfD!^1XB3?E?HnwHP8$;uX{Rvp# zh|)hM>XDv$ZGg=$1{+_bA~u-vXqlw6NH=nkpyWE0u}LQjF-3NhATL@9rRxMnpO%f7 z)EhZf{PF|mKIMFxnC?*78(}{Y)}iztV12}_OXffJ;ta!fcFIVjdchyHxH=t%ci`Xd zX2AUB?%?poD6Zv*&BA!6c5S#|xn~DK01#XvjT!w!;&`lDXSJT4_j$}!qSPrb37vc{ z9^NfC%QvPu@vlxaZ;mIbn-VHA6miwi8qJ~V;pTZkKqqOii<1Cs}0i?uUIss;hM4dKq^1O35y?Yp=l4i zf{M!@QHH~rJ&X~8uATV><23zZUbs-J^3}$IvV_ANLS08>k`Td7aU_S1sLsfi*C-m1 z-e#S%UGs4E!;CeBT@9}aaI)qR-6NU@kvS#0r`g&UWg?fC7|b^_HyCE!8}nyh^~o@< zpm7PDFs9yxp+byMS(JWm$NeL?DNrMCNE!I^ko-*csB+dsf4GAq{=6sfyf4wb>?v1v zmb`F*bN1KUx-`ra1+TJ37bXNP%`-Fd`vVQFTwWpX@;s(%nDQa#oWhgk#mYlY*!d>( zE&!|ySF!mIyfING+#%RDY3IBH_fW$}6~1%!G`suHub1kP@&DoAd5~7J55;5_noPI6eLf{t;@9Kf<{aO0`1WNKd?<)C-|?C?)3s z>wEq@8=I$Wc~Mt$o;g++5qR+(6wt9GI~pyrDJ%c?gPZe)owvy^J2S=+M^ z&WhIE`g;;J^xQLVeCtf7b%Dg#Z2gq9hp_%g)-%_`y*zb; zn9`f`mUPN-Ts&fFo(aNTsXPA|J!TJ{0hZp0^;MYHLOcD=r_~~^ymS8KLCSeU3;^QzJNqS z5{5rEAv#l(X?bvwxpU;2%pQftF`YFgrD1jt2^~Mt^~G>T*}A$yZc@(k9orlCGv&|1 zWWvVgiJsCAtamuAYT~nzs?TQFt<1LSEx!@e0~@yd6$b5!Zm(FpBl;(Cn>2vF?k zOm#TTjFwd2D-CyA!mqR^?#Uwm{NBemP>(pHmM}9;;8`c&+_o3#E5m)JzfwN?(f-a4 zyd%xZc^oQx3XT?vcCqCX&Qrk~nu;fxs@JUoyVoi5fqpi&bUhQ2y!Ok2pzsFR(M(|U zw3E+kH_zmTRQ9dUMZWRE%Zakiwc+lgv7Z%|YO9YxAy`y28`Aw;WU6HXBgU7fl@dnt z-fFBV)}H-gqP!1;V@Je$WcbYre|dRdp{xt!7sL3Eoa%IA`5CAA%;Wq8PktwPdULo! z8!sB}Qt8#jH9Sh}QiUtEPZ6H0b*7qEKGJ%ITZ|vH)5Q^2m<7o3#Z>AKc%z7_u`rXA zqrCy{-{8;9>dfllLu$^M5L z-hXs))h*qz%~ActwkIA(qOVBZl2v4lwbM>9l70Y`+T*elINFqt#>OaVWoja8RMsep z6Or3f=oBnA3vDbn*+HNZP?8LsH2MY)x%c13@(XfuGR}R?Nu<|07{$+Lc3$Uv^I!MQ z>6qWgd-=aG2Y^24g4{Bw9ueOR)(9h`scImD=86dD+MnSN4$6 z^U*o_mE-6Rk~Dp!ANp#5RE9n*LG(Vg`1)g6!(XtDzsov$Dvz|Gv1WU68J$CkshQhS zCrc|cdkW~UK}5NeaWj^F4MSgFM+@fJd{|LLM)}_O<{rj z+?*Lm?owq?IzC%U%9EBga~h-cJbIu=#C}XuWN>OLrc%M@Gu~kFEYUi4EC6l#PR2JS zQUkGKrrS#6H7}2l0F@S11DP`@pih0WRkRJl#F;u{c&ZC{^$Z+_*lB)r)-bPgRFE;* zl)@hK4`tEP=P=il02x7-C7p%l=B`vkYjw?YhdJU9!P!jcmY$OtC^12w?vy3<<=tlY zUwHJ_0lgWN9vf>1%WACBD{UT)1qHQSE2%z|JHvP{#INr13jM}oYv_5#xsnv9`)UAO zuwgyV4YZ;O)eSc3(mka6=aRohi!HH@I#xq7kng?Acdg7S4vDJb6cI5fw?2z%3yR+| zU5v@Hm}vy;${cBp&@D=HQ9j7NcFaOYL zj-wV=eYF{|XTkFNM2uz&T8uH~;)^Zo!=KP)EVyH6s9l1~4m}N%XzPpduPg|h-&lL` zAXspR0YMOKd2yO)eMFFJ4?sQ&!`dF&!|niH*!^*Ml##o0M(0*uK9&yzekFi$+mP9s z>W9d%Jb)PtVi&-Ha!o~Iyh@KRuKpQ@)I~L*d`{O8!kRObjO7=n+Gp36fe!66neh+7 zW*l^0tTKjLLzr`x4`_8&on?mjW-PzheTNox8Hg7Nt@*SbE-%kP2hWYmHu#Fn@Q^J(SsPUz*|EgOoZ6byg3ew88UGdZ>9B2Tq=jF72ZaR=4u%1A6Vm{O#?@dD!(#tmR;eP(Fu z{$0O%=Vmua7=Gjr8nY%>ul?w=FJ76O2js&17W_iq2*tb!i{pt#`qZB#im9Rl>?t?0c zicIC}et_4d+CpVPx)i4~$u6N-QX3H77ez z?ZdvXifFk|*F8~L(W$OWM~r`pSk5}#F?j_5u$Obu9lDWIknO^AGu+Blk7!9Sb;NjS zncZA?qtASdNtzQ>z7N871IsPAk^CC?iIL}+{K|F@BuG2>qQ;_RUYV#>hHO(HUPpk@ z(bn~4|F_jiZi}Sad;_7`#4}EmD<1EiIxa48QjUuR?rC}^HRocq`OQPM@aHVKP9E#q zy%6bmHygCpIddPjE}q_DPC`VH_2m;Eey&ZH)E6xGeStOK7H)#+9y!%-Hm|QF6w#A( zIC0Yw%9j$s-#odxG~C*^MZ?M<+&WJ+@?B_QPUyTg9DJGtQN#NIC&-XddRsf3n^AL6 zT@P|H;PvN;ZpL0iv$bRb7|J{0o!Hq+S>_NrH4@coZtBJu#g8#CbR7|#?6uxi8d+$g z87apN>EciJZ`%Zv2**_uiET9Vk{pny&My;+WfGDw4EVL#B!Wiw&M|A8f1A@ z(yFQS6jfbH{b8Z-S7D2?Ixl`j0{+ZnpT=;KzVMLW{B$`N?Gw^Fl0H6lT61%T2AU**!sX0u?|I(yoy&Xveg7XBL&+>n6jd1##6d>TxE*Vj=8lWiG$4=u{1UbAa5QD>5_ z;Te^42v7K6Mmu4IWT6Rnm>oxrl~b<~^e3vbj-GCdHLIB_>59}Ya+~OF68NiH=?}2o zP(X7EN=quQn&)fK>M&kqF|<_*H`}c zk=+x)GU>{Af#vx&s?`UKUsz})g^Pc&?Ka@t5$n$bqf6{r1>#mWx6Ep>9|A}VmWRnowVo`OyCr^fHsf# zQjQ3Ttp7y#iQY8l`zEUW)(@gGQdt(~rkxlkefskT(t%@i8=|p1Y9Dc5bc+z#n$s13 zGJk|V0+&Ekh(F};PJzQKKo+FG@KV8a<$gmNSD;7rd_nRdc%?9)p!|B-@P~kxQG}~B zi|{0}@}zKC(rlFUYp*dO1RuvPC^DQOkX4<+EwvBAC{IZQdYxoq1Za!MW7%p7gGr=j zzWnAq%)^O2$eItftC#TTSArUyL$U54-O7e|)4_7%Q^2tZ^0-d&3J1}qCzR4dWX!)4 zzIEKjgnYgMus^>6uw4Jm8ga6>GBtMjpNRJ6CP~W=37~||gMo_p@GA@#-3)+cVYnU> zE5=Y4kzl+EbEh%dhQokB{gqNDqx%5*qBusWV%!iprn$S!;oN_6E3?0+umADVs4ako z?P+t?m?};gev9JXQ#Q&KBpzkHPde_CGu-y z<{}RRAx=xlv#mVi+Ibrgx~ujW$h{?zPfhz)Kp7kmYS&_|97b&H&1;J-mzrBWAvY} zh8-I8hl_RK2+nnf&}!W0P+>5?#?7>npshe<1~&l_xqKd0_>dl_^RMRq@-Myz&|TKZBj1=Q()) zF{dBjv5)h=&Z)Aevx}+i|7=R9rG^Di!sa)sZCl&ctX4&LScQ-kMncgO(9o6W6)yd< z@Rk!vkja*X_N3H=BavGoR0@u0<}m-7|2v!0+2h~S2Q&a=lTH91OJsvms2MT~ zY=c@LO5i`mLpBd(vh|)I&^A3TQLtr>w=zoyzTd=^f@TPu&+*2MtqE$Avf>l>}V|3-8Fp2hzo3y<)hr_|NO(&oSD z!vEjTWBxbKTiShVl-U{n*B3#)3a8$`{~Pk}J@elZ=>Pqp|MQ}jrGv7KrNcjW%TN_< zZz8kG{#}XoeWf7qY?D)L)8?Q-b@Na&>i=)(@uNo zr;cH98T3$Iau8Hn*@vXi{A@YehxDE2zX~o+RY`)6-X{8~hMpc#C`|8y> zU8Mnv5A0dNCf{Ims*|l-^ z(MRp{qoGohB34|ggDI*p!Aw|MFyJ|v+<+E3brfrI)|+l3W~CQLPbnF@G0)P~Ly!1TJLp}xh8uW`Q+RB-v`MRYZ9Gam3cM%{ zb4Cb*f)0deR~wtNb*8w-LlIF>kc7DAv>T0D(a3@l`k4TFnrO+g9XH7;nYOHxjc4lq zMmaW6qpgAgy)MckYMhl?>sq;-1E)-1llUneeA!ya9KM$)DaNGu57Z5aE>=VST$#vb zFo=uRHr$0M{-ha>h(D_boS4zId;3B|Tpqo|?B?Z@I?G(?&Iei+-{9L_A9=h=Qfn-U z1wIUnQe9!z%_j$F_{rf&`ZFSott09gY~qrf@g3O=Y>vzAnXCyL!@(BqWa)Zqt!#_k zfZHuwS52|&&)aK;CHq9V-t9qt0au{$#6c*R#e5n3rje0hic7c7m{kW$p(_`wB=Gw7 z4k`1Hi;Mc@yA7dp@r~?@rfw)TkjAW++|pkfOG}0N|2guek}j8Zen(!+@7?qt_7ndX zB=BG6WJ31#F3#Vk3=aQr8T)3`{=p9nBHlKzE0I@v`{vJ}h8pd6vby&VgFhzH|q;=aonunAXL6G2y(X^CtAhWr*jI zGjpY@raZDQkg*aMq}Ni6cRF z{oWv}5`nhSAv>usX}m^GHt`f(t8@zHc?K|y5Zi=4G*UG1Sza{$Dpj%X8 zzEXaKT5N6F5j4J|w#qlZP!zS7BT)9b+!ZSJdToqJts1c!)fwih4d31vfb{}W)EgcA zH2pZ^8_k$9+WD2n`6q5XbOy8>3pcYH9 z07eUB+p}YD@AH!}p!iKv><2QF-Y^&xx^PAc1F13A{nUeCDg&{hnix#FiO!fe(^&%Qcux!h znu*S!s$&nnkeotYsDthh1dq(iQrE|#f_=xVgfiiL&-5eAcC-> z5L0l|DVEM$#ulf{bj+Y~7iD)j<~O8CYM8GW)dQGq)!mck)FqoL^X zwNdZb3->hFrbHFm?hLvut-*uK?zXn3q1z|UX{RZ;-WiLoOjnle!xs+W0-8D)kjU#R z+S|A^HkRg$Ij%N4v~k`jyHffKaC~=wg=9)V5h=|kLQ@;^W!o2^K+xG&2n`XCd>OY5Ydi= zgHH=lgy++erK8&+YeTl7VNyVm9-GfONlSlVb3)V9NW5tT!cJ8d7X)!b-$fb!s76{t z@d=Vg-5K_sqHA@Zx-L_}wVnc@L@GL9_K~Zl(h5@AR#FAiKad8~KeWCo@mgXIQ#~u{ zgYFwNz}2b6Vu@CP0XoqJ+dm8px(5W5-Jpis97F`+KM)TuP*X8H@zwiVKDKGVp59pI zifNHZr|B+PG|7|Y<*tqap0CvG7tbR1R>jn70t1X`XJixiMVcHf%Ez*=xm1(CrTSDt z0cle!+{8*Ja&EOZ4@$qhBuKQ$U95Q%rc7tg$VRhk?3=pE&n+T3upZg^ZJc9~c2es% zh7>+|mrmA-p&v}|OtxqmHIBgUxL~^0+cpfkSK2mhh+4b=^F1Xgd2)}U*Yp+H?ls#z zrLxWg_hm}AfK2XYWr!rzW4g;+^^&bW%LmbtRai9f3PjU${r@n`JThy-cphbcwn)rq9{A$Ht`lmYKxOacy z6v2R(?gHhD5@&kB-Eg?4!hAoD7~(h>(R!s1c1Hx#s9vGPePUR|of32bS`J5U5w{F) z>0<^ktO2UHg<0{oxkdOQ;}coZDQph8p6ruj*_?uqURCMTac;>T#v+l1Tc~%^k-Vd@ zkc5y35jVNc49vZpZx;gG$h{%yslDI%Lqga1&&;mN{Ush1c7p>7e-(zp}6E7f-XmJb4nhk zb8zS+{IVbL$QVF8pf8}~kQ|dHJAEATmmnrb_wLG}-yHe>W|A&Y|;muy-d^t^<&)g5SJfaTH@P1%euONny=mxo+C z4N&w#biWY41r8k~468tvuYVh&XN&d#%QtIf9;iVXfWY)#j=l`&B~lqDT@28+Y!0E+MkfC}}H*#(WKKdJJq=O$vNYCb(ZG@p{fJgu;h z21oHQ(14?LeT>n5)s;uD@5&ohU!@wX8w*lB6i@GEH0pM>YTG+RAIWZD;4#F1&F%Jp zXZUml2sH0!lYJT?&sA!qwez6cXzJEd(1ZC~kT5kZSp7(@=H2$Azb_*W&6aA|9iwCL zdX7Q=42;@dspHDwYE?miGX#L^3xD&%BI&fN9^;`v4OjQXPBaBmOF1;#C)8XA(WFlH zycro;DS2?(G&6wkr6rqC>rqDv3nfGw3hmN_9Al>TgvmGsL8_hXx09};l9Ow@)F5@y z#VH5WigLDwZE4nh^7&@g{1FV^UZ%_LJ-s<{HN*2R$OPg@R~Z`c-ET*2}XB@9xvAjrK&hS=f|R8Gr9 zr|0TGOsI7RD+4+2{ZiwdVD@2zmg~g@^D--YL;6UYGSM8i$NbQr4!c7T9rg!8;TM0E zT#@?&S=t>GQm)*ua|?TLT2ktj#`|R<_*FAkOu2Pz$wEc%-=Y9V*$&dg+wIei3b*O8 z2|m$!jJG!J!ZGbbIa!(Af~oSyZV+~M1qGvelMzPNE_%5?c2>;MeeG2^N?JDKjFYCy z7SbPWH-$cWF9~fX%9~v99L!G(wi!PFp>rB!9xj7=Cv|F+7CsGNwY0Q_J%FID%C^CBZQfJ9K(HK%k31j~e#&?hQ zNuD6gRkVckU)v+53-fc} z7ZCzYN-5RG4H7;>>Hg?LU9&5_aua?A0)0dpew1#MMlu)LHe(M;OHjHIUl7|%%)YPo z0cBk;AOY00%Fe6heoN*$(b<)Cd#^8Iu;-2v@>cE-OB$icUF9EEoaC&q8z9}jMTT2I z8`9;jT%z0;dy4!8U;GW{i`)3!c6&oWY`J3669C!tM<5nQFFrFRglU8f)5Op$GtR-3 zn!+SPCw|04sv?%YZ(a7#L?vsdr7ss@WKAw&A*}-1S|9~cL%uA+E~>N6QklFE>8W|% zyX-qAUGTY1hQ-+um`2|&ji0cY*(qN!zp{YpDO-r>jPk*yuVSay<)cUt`t@&FPF_&$ zcHwu1(SQ`I-l8~vYyUxm@D1UEdFJ$f5Sw^HPH7b!9 zzYT3gKMF((N(v0#4f_jPfVZ=ApN^jQJe-X$`A?X+vWjLn_%31KXE*}5_}d8 zw_B1+a#6T1?>M{ronLbHIlEsMf93muJ7AH5h%;i99<~JX^;EAgEB1uHralD*!aJ@F zV2ruuFe9i2Q1C?^^kmVy921eb=tLDD43@-AgL^rQ3IO9%+vi_&R2^dpr}x{bCVPej z7G0-0o64uyWNtr*loIvslyo0%)KSDDKjfThe0hcqs)(C-MH1>bNGBDRTW~scy_{w} zp^aq8Qb!h9Lwielq%C1b8=?Z=&U)ST&PHbS)8Xzjh2DF?d{iAv)Eh)wsUnf>UtXN( zL7=$%YrZ#|^c{MYmhn!zV#t*(jdmYdCpwqpZ{v&L8KIuKn`@IIZfp!uo}c;7J57N` zAxyZ-uA4=Gzl~Ovycz%MW9ZL7N+nRo&1cfNn9(1H5eM;V_4Z_qVann7F>5f>%{rf= zPBZFaV@_Sobl?Fy&KXyzFDV*FIdhS5`Uc~S^Gjo)aiTHgn#<0C=9o-a-}@}xDor;D zZyZ|fvf;+=3MZd>SR1F^F`RJEZo+|MdyJYQAEauKu%WDol~ayrGU3zzbHKsnHKZ*z zFiwUkL@DZ>!*x05ql&EBq@_Vqv83&?@~q5?lVmffQZ+V-=qL+!u4Xs2Z2zdCQ3U7B&QR9_Iggy} z(om{Y9eU;IPe`+p1ifLx-XWh?wI)xU9ik+m#g&pGdB5Bi<`PR*?92lE0+TkRuXI)z z5LP!N2+tTc%cB6B1F-!fj#}>S!vnpgVU~3!*U1ej^)vjUH4s-bd^%B=ItQqDCGbrEzNQi(dJ`J}-U=2{7-d zK8k^Rlq2N#0G?9&1?HSle2vlkj^KWSBYTwx`2?9TU_DX#J+f+qLiZCqY1TXHFxXZqYMuD@RU$TgcnCC{_(vwZ-*uX)~go#%PK z@}2Km_5aQ~(<3cXeJN6|F8X_1@L%@xTzs}$_*E|a^_URF_qcF;Pfhoe?FTFwvjm1o z8onf@OY@jC2tVcMaZS;|T!Ks(wOgPpRzRnFS-^RZ4E!9dsnj9sFt609a|jJbb1Dt@ z<=Gal2jDEupxUSwWu6zp<<&RnAA;d&4gKVG0iu6g(DsST(4)z6R)zDpfaQ}v{5ARt zyhwvMtF%b-YazR5XLz+oh=mn;y-Mf2a8>7?2v8qX;19y?b>Z5laGHvzH;Nu9S`B8} zI)qN$GbXIQ1VL3lnof^6TS~rvPVg4V?Dl2Bb*K2z4E{5vy<(@@K_cN@U>R!>aUIRnb zL*)=787*cs#zb31zBC49x$`=fkQbMAef)L2$dR{)6BAz!t5U_B#1zZG`^neKSS22oJ#5B=gl%U=WeqL9REF2g zZnfCb0?quf?Ztj$VXvDSWoK`0L=Zxem2q}!XWLoT-kYMOx)!7fcgT35uC~0pySEme z`{wGWTkGr7>+Kb^n;W?BZH6ZP(9tQX%-7zF>vc2}LuWDI(9kh1G#7B99r4x6;_-V+k&c{nPUrR zAXJGRiMe~aup{0qzmLNjS_BC4cB#sXjckx{%_c&^xy{M61xEb>KW_AG5VFXUOjAG4 z^>Qlm9A#1N{4snY=(AmWzatb!ngqiqPbBZ7>Uhb3)dTkSGcL#&SH>iMO-IJBPua`u zo)LWZ>=NZLr758j{%(|uQuZ)pXq_4c!!>s|aDM9#`~1bzK3J1^^D#<2bNCccH7~-X}Ggi!pIIF>uFx%aPARGQsnC8ZQc8lrQ5o~smqOg>Ti^GNme94*w z)JZy{_{#$jxGQ&`M z!OMvZMHR>8*^>eS%o*6hJwn!l8VOOjZQJvh)@tnHVW&*GYPuxqXw}%M!(f-SQf`=L z5;=5w2;%82VMH6Xi&-K3W)o&K^+vJCepWZ-rW%+Dc6X3(){z$@4zjYxQ|}8UIojeC zYZpQ1dU{fy=oTr<4VX?$q)LP}IUmpiez^O&N3E_qPpchGTi5ZM6-2ScWlQq%V&R2Euz zO|Q0Hx>lY1Q1cW5xHv5!0OGU~PVEqSuy#fD72d#O`N!C;o=m+YioGu-wH2k6!t<~K zSr`E=W9)!g==~x9VV~-8{4ZN9{~-A9zJpRe%NGg$+MDuI-dH|b@BD)~>pPCGUNNzY zMDg||0@XGQgw`YCt5C&A{_+J}mvV9Wg{6V%2n#YSRN{AP#PY?1FF1#|vO_%e+#`|2*~wGAJaeRX6=IzFNeWhz6gJc8+(03Ph4y6ELAm=AkN7TOgMUEw*N{= z_)EIDQx5q22oUR+_b*tazu9+pX|n1c*IB-}{DqIj z-?E|ks{o3AGRNb;+iKcHkZvYJvFsW&83RAPs1Oh@IWy%l#5x2oUP6ZCtv+b|q>jsf zZ_9XO;V!>n`UxH1LvH8)L4?8raIvasEhkpQoJ`%!5rBs!0Tu(s_D{`4opB;57)pkX z4$A^8CsD3U5*!|bHIEqsn~{q+Ddj$ME@Gq4JXtgVz&7l{Ok!@?EA{B3P~NAqb9)4? zkQo30A^EbHfQ@87G5&EQTd`frrwL)&Yw?%-W@uy^Gn23%j?Y!Iea2xw<-f;esq zf%w5WN@E1}zyXtYv}}`U^B>W`>XPmdLj%4{P298|SisrE;7HvXX;A}Ffi8B#3Lr;1 zHt6zVb`8{#+e$*k?w8|O{Uh|&AG}|DG1PFo1i?Y*cQm$ZwtGcVgMwtBUDa{~L1KT-{jET4w60>{KZ27vXrHJ;fW{6| z=|Y4!&UX020wU1>1iRgB@Q#m~1^Z^9CG1LqDhYBrnx%IEdIty z!46iOoKlKs)c}newDG)rWUikD%j`)p z_w9Ph&e40=(2eBy;T!}*1p1f1SAUDP9iWy^u^Ubdj21Kn{46;GR+hwLO=4D11@c~V zI8x&(D({K~Df2E)Nx_yQvYfh4;MbMJ@Z}=Dt3_>iim~QZ*hZIlEs0mEb z_54+&*?wMD`2#vsQRN3KvoT>hWofI_Vf(^C1ff-Ike@h@saEf7g}<9T`W;HAne-Nd z>RR+&SP35w)xKn8^U$7))PsM!jKwYZ*RzEcG-OlTrX3}9a{q%#Un5E5W{{hp>w~;` zGky+3(vJvQyGwBo`tCpmo0mo((?nM8vf9aXrrY1Ve}~TuVkB(zeds^jEfI}xGBCM2 zL1|#tycSaWCurP+0MiActG3LCas@_@tao@(R1ANlwB$4K53egNE_;!&(%@Qo$>h`^1S_!hN6 z)vZtG$8fN!|BXBJ=SI>e(LAU(y(i*PHvgQ2llulxS8>qsimv7yL}0q_E5WiAz7)(f zC(ahFvG8&HN9+6^jGyLHM~$)7auppeWh_^zKk&C_MQ~8;N??OlyH~azgz5fe^>~7F zl3HnPN3z-kN)I$4@`CLCMQx3sG~V8hPS^}XDXZrQA>}mQPw%7&!sd(Pp^P=tgp-s^ zjl}1-KRPNWXgV_K^HkP__SR`S-|OF0bR-N5>I%ODj&1JUeAQ3$9i;B~$S6}*^tK?= z**%aCiH7y?xdY?{LgVP}S0HOh%0%LI$wRx;$T|~Y8R)Vdwa}kGWv8?SJVm^>r6+%I z#lj1aR94{@MP;t-scEYQWc#xFA30^}?|BeX*W#9OL;Q9#WqaaM546j5j29((^_8Nu z4uq}ESLr~r*O7E7$D{!k9W>`!SLoyA53i9QwRB{!pHe8um|aDE`Cg0O*{jmor)^t)3`>V>SWN-2VJcFmj^1?~tT=JrP`fVh*t zXHarp=8HEcR#vFe+1a%XXuK+)oFs`GDD}#Z+TJ}Ri`FvKO@ek2ayn}yaOi%(8p%2$ zpEu)v0Jym@f}U|-;}CbR=9{#<^z28PzkkTNvyKvJDZe+^VS2bES3N@Jq!-*}{oQlz z@8bgC_KnDnT4}d#&Cpr!%Yb?E!brx0!eVOw~;lLwUoz#Np%d$o%9scc3&zPm`%G((Le|6o1 zM(VhOw)!f84zG^)tZ1?Egv)d8cdNi+T${=5kV+j;Wf%2{3g@FHp^Gf*qO0q!u$=m9 zCaY`4mRqJ;FTH5`a$affE5dJrk~k`HTP_7nGTY@B9o9vvnbytaID;^b=Tzp7Q#DmD zC(XEN)Ktn39z5|G!wsVNnHi) z%^q94!lL|hF`IijA^9NR0F$@h7k5R^ljOW(;Td9grRN0Mb)l_l7##{2nPQ@?;VjXv zaLZG}yuf$r$<79rVPpXg?6iiieX|r#&`p#Con2i%S8*8F}(E) zI5E6c3tG*<;m~6>!&H!GJ6zEuhH7mkAzovdhLy;)q z{H2*8I^Pb}xC4s^6Y}6bJvMu=8>g&I)7!N!5QG$xseeU#CC?ZM-TbjsHwHgDGrsD= z{%f;@Sod+Ch66Ko2WF~;Ty)v>&x^aovCbCbD7>qF*!?BXmOV3(s|nxsb*Lx_2lpB7 zokUnzrk;P=T-&kUHO}td+Zdj!3n&NR?K~cRU zAXU!DCp?51{J4w^`cV#ye}(`SQhGQkkMu}O3M*BWt4UsC^jCFUy;wTINYmhD$AT;4 z?Xd{HaJjP`raZ39qAm;%beDbrLpbRf(mkKbANan7XsL>_pE2oo^$TgdidjRP!5-`% zv0d!|iKN$c0(T|L0C~XD0aS8t{*&#LnhE;1Kb<9&=c2B+9JeLvJr*AyyRh%@jHej=AetOMSlz^=!kxX>>B{2B1uIrQyfd8KjJ+DBy!h)~*(!|&L4^Q_07SQ~E zcemVP`{9CwFvPFu7pyVGCLhH?LhEVb2{7U+Z_>o25#+3<|8%1T^5dh}*4(kfJGry} zm%r#hU+__Z;;*4fMrX=Bkc@7|v^*B;HAl0((IBPPii%X9+u3DDF6%bI&6?Eu$8&aWVqHIM7mK6?Uvq$1|(-T|)IV<>e?!(rY zqkmO1MRaLeTR=)io(0GVtQT@s6rN%C6;nS3@eu;P#ry4q;^O@1ZKCJyp_Jo)Ty^QW z+vweTx_DLm{P-XSBj~Sl<%_b^$=}odJ!S2wAcxenmzFGX1t&Qp8Vxz2VT`uQsQYtdn&_0xVivIcxZ_hnrRtwq4cZSj1c-SG9 z7vHBCA=fd0O1<4*=lu$6pn~_pVKyL@ztw1swbZi0B?spLo56ZKu5;7ZeUml1Ws1?u zqMf1p{5myAzeX$lAi{jIUqo1g4!zWLMm9cfWcnw`k6*BR^?$2(&yW?>w;G$EmTA@a z6?y#K$C~ZT8+v{87n5Dm&H6Pb_EQ@V0IWmG9cG=O;(;5aMWWrIPzz4Q`mhK;qQp~a z+BbQrEQ+w{SeiuG-~Po5f=^EvlouB@_|4xQXH@A~KgpFHrwu%dwuCR)=B&C(y6J4J zvoGk9;lLs9%iA-IJGU#RgnZZR+@{5lYl8(e1h6&>Vc_mvg0d@);X zji4T|n#lB!>pfL|8tQYkw?U2bD`W{na&;*|znjmalA&f;*U++_aBYerq;&C8Kw7mI z7tsG*?7*5j&dU)Lje;^{D_h`%(dK|pB*A*1(Jj)w^mZ9HB|vGLkF1GEFhu&rH=r=8 zMxO42e{Si6$m+Zj`_mXb&w5Q(i|Yxyg?juUrY}78uo@~3v84|8dfgbPd0iQJRdMj< zncCNGdMEcsxu#o#B5+XD{tsg*;j-eF8`mp~K8O1J!Z0+>0=7O=4M}E?)H)ENE;P*F z$Ox?ril_^p0g7xhDUf(q652l|562VFlC8^r8?lQv;TMvn+*8I}&+hIQYh2 z1}uQQaag&!-+DZ@|C+C$bN6W;S-Z@)d1|en+XGvjbOxCa-qAF*LA=6s(Jg+g;82f$ z(Vb)8I)AH@cdjGFAR5Rqd0wiNCu!xtqWbcTx&5kslzTb^7A78~Xzw1($UV6S^VWiP zFd{Rimd-0CZC_Bu(WxBFW7+k{cOW7DxBBkJdJ;VsJ4Z@lERQr%3eVv&$%)b%<~ zCl^Y4NgO}js@u{|o~KTgH}>!* z_iDNqX2(As7T0xivMH|3SC1ivm8Q}6Ffcd7owUKN5lHAtzMM4<0v+ykUT!QiowO;`@%JGv+K$bBx@*S7C8GJVqQ_K>12}M`f_Ys=S zKFh}HM9#6Izb$Y{wYzItTy+l5U2oL%boCJn?R3?jP@n$zSIwlmyGq30Cw4QBO|14` zW5c);AN*J3&eMFAk$SR~2k|&+&Bc$e>s%c{`?d~85S-UWjA>DS5+;UKZ}5oVa5O(N zqqc@>)nee)+4MUjH?FGv%hm2{IlIF-QX}ym-7ok4Z9{V+ZHVZQl$A*x!(q%<2~iVv znUa+BX35&lCb#9VE-~Y^W_f;Xhl%vgjwdjzMy$FsSIj&ok}L+X`4>J=9BkN&nu^E*gbhj3(+D>C4E z@Fwq_=N)^bKFSHTzZk?-gNU$@l}r}dwGyh_fNi=9b|n}J>&;G!lzilbWF4B}BBq4f zYIOl?b)PSh#XTPp4IS5ZR_2C!E)Z`zH0OW%4;&~z7UAyA-X|sh9@~>cQW^COA9hV4 zXcA6qUo9P{bW1_2`eo6%hgbN%(G-F1xTvq!sc?4wN6Q4`e9Hku zFwvlAcRY?6h^Fj$R8zCNEDq8`=uZB8D-xn)tA<^bFFy}4$vA}Xq0jAsv1&5!h!yRA zU()KLJya5MQ`q&LKdH#fwq&(bNFS{sKlEh_{N%{XCGO+po#(+WCLmKW6&5iOHny>g z3*VFN?mx!16V5{zyuMWDVP8U*|BGT$(%IO|)?EF|OI*sq&RovH!N%=>i_c?K*A>>k zyg1+~++zY4Q)J;VWN0axhoIKx;l&G$gvj(#go^pZskEVj8^}is3Jw26LzYYVos0HX zRPvmK$dVxM8(Tc?pHFe0Z3uq){{#OK3i-ra#@+;*=ui8)y6hsRv z4Fxx1c1+fr!VI{L3DFMwXKrfl#Q8hfP@ajgEau&QMCxd{g#!T^;ATXW)nUg&$-n25 zruy3V!!;{?OTobo|0GAxe`Acn3GV@W=&n;~&9 zQM>NWW~R@OYORkJAo+eq1!4vzmf9K%plR4(tB@TR&FSbDoRgJ8qVcH#;7lQub*nq&?Z>7WM=oeEVjkaG zT#f)=o!M2DO5hLR+op>t0CixJCIeXH*+z{-XS|%jx)y(j&}Wo|3!l7{o)HU3m7LYyhv*xF&tq z%IN7N;D4raue&&hm0xM=`qv`+TK@;_xAcGKuK(2|75~ar2Yw)geNLSmVxV@x89bQu zpViVKKnlkwjS&&c|-X6`~xdnh}Ps)Hs z4VbUL^{XNLf7_|Oi>tA%?SG5zax}esF*FH3d(JH^Gvr7Rp*n=t7frH!U;!y1gJB^i zY_M$KL_}mW&XKaDEi9K-wZR|q*L32&m+2n_8lq$xRznJ7p8}V>w+d@?uB!eS3#u<} zIaqi!b!w}a2;_BfUUhGMy#4dPx>)_>yZ`ai?Rk`}d0>~ce-PfY-b?Csd(28yX22L% zI7XI>OjIHYTk_@Xk;Gu^F52^Gn6E1&+?4MxDS2G_#PQ&yXPXP^<-p|2nLTb@AAQEY zI*UQ9Pmm{Kat}wuazpjSyXCdnrD&|C1c5DIb1TnzF}f4KIV6D)CJ!?&l&{T)e4U%3HTSYqsQ zo@zWB1o}ceQSV)<4G<)jM|@@YpL+XHuWsr5AYh^Q{K=wSV99D~4RRU52FufmMBMmd z_H}L#qe(}|I9ZyPRD6kT>Ivj&2Y?qVZq<4bG_co_DP`sE*_Xw8D;+7QR$Uq(rr+u> z8bHUWbV19i#)@@G4bCco@Xb<8u~wVDz9S`#k@ciJtlu@uP1U0X?yov8v9U3VOig2t zL9?n$P3=1U_Emi$#slR>N5wH-=J&T=EdUHA}_Z zZIl3nvMP*AZS9{cDqFanrA~S5BqxtNm9tlu;^`)3X&V4tMAkJ4gEIPl= zoV!Gyx0N{3DpD@)pv^iS*dl2FwANu;1;%EDl}JQ7MbxLMAp>)UwNwe{=V}O-5C*>F zu?Ny+F64jZn<+fKjF01}8h5H_3pey|;%bI;SFg$w8;IC<8l|3#Lz2;mNNik6sVTG3 z+Su^rIE#40C4a-587$U~%KedEEw1%r6wdvoMwpmlXH$xPnNQN#f%Z7|p)nC>WsuO= z4zyqapLS<8(UJ~Qi9d|dQijb_xhA2)v>la)<1md5s^R1N&PiuA$^k|A<+2C?OiHbj z>Bn$~t)>Y(Zb`8hW7q9xQ=s>Rv81V+UiuZJc<23HplI88isqRCId89fb`Kt|CxVIg znWcwprwXnotO>3s&Oypkte^9yJjlUVVxSe%_xlzmje|mYOVPH^vjA=?6xd0vaj0Oz zwJ4OJNiFdnHJX3rw&inskjryukl`*fRQ#SMod5J|KroJRsVXa5_$q7whSQ{gOi*s0 z1LeCy|JBWRsDPn7jCb4s(p|JZiZ8+*ExC@Vj)MF|*Vp{B(ziccSn`G1Br9bV(v!C2 z6#?eqpJBc9o@lJ#^p-`-=`4i&wFe>2)nlPK1p9yPFzJCzBQbpkcR>={YtamIw)3nt z(QEF;+)4`>8^_LU)_Q3 zC5_7lgi_6y>U%m)m@}Ku4C}=l^J=<<7c;99ec3p{aR+v=diuJR7uZi%aQv$oP?dn?@6Yu_+*^>T0ptf(oobdL;6)N-I!TO`zg^Xbv3#L0I~sn@WGk-^SmPh5>W+LB<+1PU}AKa?FCWF|qMNELOgdxR{ zbqE7@jVe+FklzdcD$!(A$&}}H*HQFTJ+AOrJYnhh}Yvta(B zQ_bW4Rr;R~&6PAKwgLWXS{Bnln(vUI+~g#kl{r+_zbngT`Y3`^Qf=!PxN4IYX#iW4 zucW7@LLJA9Zh3(rj~&SyN_pjO8H&)|(v%!BnMWySBJV=eSkB3YSTCyIeJ{i;(oc%_hk{$_l;v>nWSB)oVeg+blh=HB5JSlG_r7@P z3q;aFoZjD_qS@zygYqCn=;Zxjo!?NK!%J$ z52lOP`8G3feEj+HTp@Tnn9X~nG=;tS+z}u{mQX_J0kxtr)O30YD%oo)L@wy`jpQYM z@M>Me=95k1p*FW~rHiV1CIfVc{K8r|#Kt(ApkXKsDG$_>76UGNhHExFCw#Ky9*B-z zNq2ga*xax!HMf_|Vp-86r{;~YgQKqu7%szk8$hpvi_2I`OVbG1doP(`gn}=W<8%Gn z%81#&WjkH4GV;4u43EtSW>K_Ta3Zj!XF?;SO3V#q=<=>Tc^@?A`i;&`-cYj|;^ zEo#Jl5zSr~_V-4}y8pnufXLa80vZY4z2ko7fj>DR)#z=wWuS1$$W!L?(y}YC+yQ|G z@L&`2upy3f>~*IquAjkVNU>}c10(fq#HdbK$~Q3l6|=@-eBbo>B9(6xV`*)sae58*f zym~RRVx;xoCG3`JV`xo z!lFw)=t2Hy)e!IFs?0~7osWk(d%^wxq&>_XD4+U#y&-VF%4z?XH^i4w`TxpF{`XhZ z%G}iEzf!T(l>g;W9<~K+)$g!{UvhW{E0Lis(S^%I8OF&%kr!gJ&fMOpM=&=Aj@wuL zBX?*6i51Qb$uhkwkFYkaD_UDE+)rh1c;(&Y=B$3)J&iJfQSx!1NGgPtK!$c9OtJuu zX(pV$bfuJpRR|K(dp@^j}i&HeJOh@|7lWo8^$*o~Xqo z5Sb+!EtJ&e@6F+h&+_1ETbg7LfP5GZjvIUIN3ibCOldAv z)>YdO|NH$x7AC8dr=<2ekiY1%fN*r~e5h6Yaw<{XIErujKV~tiyrvV_DV0AzEknC- zR^xKM3i<1UkvqBj3C{wDvytOd+YtDSGu!gEMg+!&|8BQrT*|p)(dwQLEy+ zMtMzij3zo40)CA!BKZF~yWg?#lWhqD3@qR)gh~D{uZaJO;{OWV8XZ_)J@r3=)T|kt zUS1pXr6-`!Z}w2QR7nP%d?ecf90;K_7C3d!UZ`N(TZoWNN^Q~RjVhQG{Y<%E1PpV^4 z-m-K+$A~-+VDABs^Q@U*)YvhY4Znn2^w>732H?NRK(5QSS$V@D7yz2BVX4)f5A04~$WbxGOam22>t&uD)JB8-~yiQW6ik;FGblY_I>SvB_z2?PS z*Qm&qbKI{H1V@YGWzpx`!v)WeLT02};JJo*#f$a*FH?IIad-^(;9XC#YTWN6;Z6+S zm4O1KH=#V@FJw7Pha0!9Vb%ZIM$)a`VRMoiN&C|$YA3~ZC*8ayZRY^fyuP6$n%2IU z$#XceYZeqLTXw(m$_z|33I$B4k~NZO>pP6)H_}R{E$i%USGy{l{-jOE;%CloYPEU+ zRFxOn4;7lIOh!7abb23YKD+_-?O z0FP9otcAh+oSj;=f#$&*ExUHpd&e#bSF%#8*&ItcL2H$Sa)?pt0Xtf+t)z$_u^wZi z44oE}r4kIZGy3!Mc8q$B&6JqtnHZ>Znn!Zh@6rgIu|yU+zG8q`q9%B18|T|oN3zMq z`l&D;U!OL~%>vo&q0>Y==~zLiCZk4v%s_7!9DxQ~id1LLE93gf*gg&2$|hB#j8;?3 z5v4S;oM6rT{Y;I+#FdmNw z){d%tNM<<#GN%n9ox7B=3#;u7unZ~tLB_vRZ52a&2=IM)2VkXm=L+Iqq~uk#Dug|x z>S84e+A7EiOY5lj*!q?6HDkNh~0g;0Jy(al!ZHHDtur9T$y-~)94HelX1NHjXWIM7UAe}$?jiz z9?P4`I0JM=G5K{3_%2jPLC^_Mlw?-kYYgb7`qGa3@dn|^1fRMwiyM@Ch z;CB&o7&&?c5e>h`IM;Wnha0QKnEp=$hA8TJgR-07N~U5(>9vJzeoFsSRBkDq=x(YgEMpb=l4TDD`2 zwVJpWGTA_u7}?ecW7s6%rUs&NXD3+n;jB86`X?8(l3MBo6)PdakI6V6a}22{)8ilT zM~T*mU}__xSy|6XSrJ^%lDAR3Lft%+yxC|ZUvSO_nqMX!_ul3;R#*{~4DA=h$bP)%8Yv9X zyp><|e8=_ttI}ZAwOd#dlnSjck#6%273{E$kJuCGu=I@O)&6ID{nWF5@gLb16sj|&Sb~+du4e4O_%_o`Ix4NRrAsyr1_}MuP94s>de8cH-OUkVPk3+K z&jW)It9QiU-ti~AuJkL`XMca8Oh4$SyJ=`-5WU<{cIh+XVH#e4d&zive_UHC!pN>W z3TB;Mn5i)9Qn)#6@lo4QpI3jFYc0~+jS)4AFz8fVC;lD^+idw^S~Qhq>Tg(!3$yLD zzktzoFrU@6s4wwCMz}edpF5i5Q1IMmEJQHzp(LAt)pgN3&O!&d?3W@6U4)I^2V{;- z6A(?zd93hS*uQmnh4T)nHnE{wVhh(=MMD(h(P4+^p83Om6t<*cUW>l(qJzr%5vp@K zN27ka(L{JX=1~e2^)F^i=TYj&;<7jyUUR2Bek^A8+3Up*&Xwc{)1nRR5CT8vG>ExV zHnF3UqXJOAno_?bnhCX-&kwI~Ti8t4`n0%Up>!U`ZvK^w2+0Cs-b9%w%4`$+To|k= zKtgc&l}P`*8IS>8DOe?EB84^kx4BQp3<7P{Pq}&p%xF_81pg!l2|u=&I{AuUgmF5n zJQCTLv}%}xbFGYtKfbba{CBo)lWW%Z>i(_NvLhoQZ*5-@2l&x>e+I~0Nld3UI9tdL zRzu8}i;X!h8LHVvN?C+|M81e>Jr38%&*9LYQec9Ax>?NN+9(_>XSRv&6hlCYB`>Qm z1&ygi{Y()OU4@D_jd_-7vDILR{>o|7-k)Sjdxkjgvi{@S>6GqiF|o`*Otr;P)kLHN zZkpts;0zw_6;?f(@4S1FN=m!4^mv~W+lJA`&7RH%2$)49z0A+8@0BCHtj|yH--AEL z0tW6G%X-+J+5a{5*WKaM0QDznf;V?L5&uQw+yegDNDP`hA;0XPYc6e0;Xv6|i|^F2WB)Z$LR|HR4 zTQsRAby9(^Z@yATyOgcfQw7cKyr^3Tz7lc7+JEwwzA7)|2x+PtEb>nD(tpxJQm)Kn zW9K_*r!L%~N*vS8<5T=iv|o!zTe9k_2jC_j*7ik^M_ zaf%k{WX{-;0*`t`G!&`eW;gChVXnJ-Rn)To8vW-?>>a%QU1v`ZC=U)f8iA@%JG0mZ zDqH;~mgBnrCP~1II<=V9;EBL)J+xzCoiRBaeH&J6rL!{4zIY8tZka?_FBeQeNO3q6 zyG_alW54Ba&wQf{&F1v-r1R6ID)PTsqjIBc+5MHkcW5Fnvi~{-FjKe)t1bl}Y;z@< z=!%zvpRua>>t_x}^}z0<7MI!H2v6|XAyR9!t50q-A)xk0nflgF4*OQlCGK==4S|wc zRMsSscNhRzHMBU8TdcHN!q^I}x0iXJ%uehac|Zs_B$p@CnF)HeXPpB_Za}F{<@6-4 zl%kml@}kHQ(ypD8FsPJ2=14xXJE|b20RUIgs!2|R3>LUMGF6X*B_I|$`Qg=;zm7C z{mEDy9dTmPbued7mlO@phdmAmJ7p@GR1bjCkMw6*G7#4+`k>fk1czdJUB!e@Q(~6# zwo%@p@V5RL0ABU2LH7Asq^quDUho@H>eTZH9f*no9fY0T zD_-9px3e}A!>>kv5wk91%C9R1J_Nh!*&Kk$J3KNxC}c_@zlgpJZ+5L)Nw|^p=2ue}CJtm;uj*Iqr)K})kA$xtNUEvX;4!Px*^&9T_`IN{D z{6~QY=Nau6EzpvufB^hflc#XIsSq0Y9(nf$d~6ZwK}fal92)fr%T3=q{0mP-EyP_G z)UR5h@IX}3Qll2b0oCAcBF>b*@Etu*aTLPU<%C>KoOrk=x?pN!#f_Og-w+;xbFgjQ zXp`et%lDBBh~OcFnMKMUoox0YwBNy`N0q~bSPh@+enQ=4RUw1) zpovN`QoV>vZ#5LvC;cl|6jPr}O5tu!Ipoyib8iXqy}TeJ;4+_7r<1kV0v5?Kv>fYp zg>9L`;XwXa&W7-jf|9~uP2iyF5`5AJ`Q~p4eBU$MCC00`rcSF>`&0fbd^_eqR+}mK z4n*PMMa&FOcc)vTUR zlDUAn-mh`ahi_`f`=39JYTNVjsTa_Y3b1GOIi)6dY)D}xeshB0T8Eov5%UhWd1)u}kjEQ|LDo{tqKKrYIfVz~@dp!! zMOnah@vp)%_-jDTUG09l+;{CkDCH|Q{NqX*uHa1YxFShy*1+;J`gywKaz|2Q{lG8x zP?KBur`}r`!WLKXY_K;C8$EWG>jY3UIh{+BLv0=2)KH%P}6xE2kg)%(-uA6lC?u8}{K(#P*c zE9C8t*u%j2r_{;Rpe1A{9nNXU;b_N0vNgyK!EZVut~}+R2rcbsHilqsOviYh-pYX= zHw@53nlmwYI5W5KP>&`dBZe0Jn?nAdC^HY1wlR6$u^PbpB#AS&5L6zqrXN&7*N2Q` z+Rae1EwS)H=aVSIkr8Ek^1jy2iS2o7mqm~Mr&g5=jjt7VxwglQ^`h#Mx+x2v|9ZAwE$i_9918MjJxTMr?n!bZ6n$}y11u8I9COTU`Z$Fi z!AeAQLMw^gp_{+0QTEJrhL424pVDp%wpku~XRlD3iv{vQ!lAf!_jyqd_h}+Tr1XG| z`*FT*NbPqvHCUsYAkFnM`@l4u_QH&bszpUK#M~XLJt{%?00GXY?u_{gj3Hvs!=N(I z(=AuWPijyoU!r?aFTsa8pLB&cx}$*%;K$e*XqF{~*rA-qn)h^!(-;e}O#B$|S~c+U zN4vyOK0vmtx$5K!?g*+J@G1NmlEI=pyZXZ69tAv=@`t%ag_Hk{LP~OH9iE)I= zaJ69b4kuCkV0V zo(M0#>phpQ_)@j;h%m{-a*LGi(72TP)ws2w*@4|C-3+;=5DmC4s7Lp95%n%@Ko zfdr3-a7m*dys9iIci$A=4NPJ`HfJ;hujLgU)ZRuJI`n;Pw|yksu!#LQnJ#dJysgNb z@@qwR^wrk(jbq4H?d!lNyy72~Dnn87KxsgQ!)|*m(DRM+eC$wh7KnS-mho3|KE)7h zK3k;qZ;K1Lj6uEXLYUYi)1FN}F@-xJ z@@3Hb84sl|j{4$3J}aTY@cbX@pzB_qM~APljrjju6P0tY{C@ zpUCOz_NFmALMv1*blCcwUD3?U6tYs+N%cmJ98D%3)%)Xu^uvzF zS5O!sc#X6?EwsYkvPo6A%O8&y8sCCQH<%f2togVwW&{M;PR!a(ZT_A+jVAbf{@5kL zB@Z(hb$3U{T_}SKA_CoQVU-;j>2J=L#lZ~aQCFg-d<9rzs$_gO&d5N6eFSc z1ml8)P*FSi+k@!^M9nDWR5e@ATD8oxtDu=36Iv2!;dZzidIS(PCtEuXAtlBb1;H%Z zwnC^Ek*D)EX4#Q>R$$WA2sxC_t(!!6Tr?C#@{3}n{<^o;9id1RA&-Pig1e-2B1XpG zliNjgmd3c&%A}s>qf{_j#!Z`fu0xIwm4L0)OF=u(OEmp;bLCIaZX$&J_^Z%4Sq4GZ zPn6sV_#+6pJmDN_lx@1;Zw6Md_p0w9h6mHtzpuIEwNn>OnuRSC2=>fP^Hqgc)xu^4 z<3!s`cORHJh#?!nKI`Et7{3C27+EuH)Gw1f)aoP|B3y?fuVfvpYYmmukx0ya-)TQX zR{ggy5cNf4X|g)nl#jC9p>7|09_S7>1D2GTRBUTW zAkQ=JMRogZqG#v;^=11O6@rPPwvJkr{bW-Qg8`q8GoD#K`&Y+S#%&B>SGRL>;ZunM@49!}Uy zN|bBCJ%sO;@3wl0>0gbl3L@1^O60ONObz8ZI7nder>(udj-jt`;yj^nTQ$L9`OU9W zX4alF#$|GiR47%x@s&LV>2Sz2R6?;2R~5k6V>)nz!o_*1Y!$p>BC5&?hJg_MiE6UBy>RkVZj`9UWbRkN-Hk!S`=BS3t3uyX6)7SF#)71*}`~Ogz z1rap5H6~dhBJ83;q-Y<5V35C2&F^JI-it(=5D#v!fAi9p#UwV~2tZQI+W(Dv?1t9? zfh*xpxxO{-(VGB>!Q&0%^YW_F!@aZS#ucP|YaD#>wd1Fv&Z*SR&mc;asi}1G) z_H>`!akh-Zxq9#io(7%;a$)w+{QH)Y$?UK1Dt^4)up!Szcxnu}kn$0afcfJL#IL+S z5gF_Y30j;{lNrG6m~$Ay?)*V9fZuU@3=kd40=LhazjFrau>(Y>SJNtOz>8x_X-BlA zIpl{i>OarVGj1v(4?^1`R}aQB&WCRQzS~;7R{tDZG=HhgrW@B`W|#cdyj%YBky)P= zpxuOZkW>S6%q7U{VsB#G(^FMsH5QuGXhb(sY+!-R8Bmv6Sx3WzSW<1MPPN1!&PurYky(@`bP9tz z52}LH9Q?+FF5jR6-;|+GVdRA!qtd;}*-h&iIw3Tq3qF9sDIb1FFxGbo&fbG5n8$3F zyY&PWL{ys^dTO}oZ#@sIX^BKW*bon=;te9j5k+T%wJ zNJtoN1~YVj4~YRrlZl)b&kJqp+Z`DqT!la$x&&IxgOQw#yZd-nBP3!7FijBXD|IsU8Zl^ zc6?MKpJQ+7ka|tZQLfchD$PD|;K(9FiLE|eUZX#EZxhG!S-63C$jWX1Yd!6-Yxi-u zjULIr|0-Q%D9jz}IF~S%>0(jOqZ(Ln<$9PxiySr&2Oic7vb<8q=46)Ln%Z|<*z5&> z3f~Zw@m;vR(bESB<=Jqkxn(=#hQw42l(7)h`vMQQTttz9XW6^|^8EK7qhju4r_c*b zJIi`)MB$w@9epwdIfnEBR+?~);yd6C(LeMC& zn&&N*?-g&BBJcV;8&UoZi4Lmxcj16ojlxR~zMrf=O_^i1wGb9X-0@6_rpjPYemIin zmJb+;lHe;Yp=8G)Q(L1bzH*}I>}uAqhj4;g)PlvD9_e_ScR{Ipq|$8NvAvLD8MYr}xl=bU~)f%B3E>r3Bu9_t|ThF3C5~BdOve zEbk^r&r#PT&?^V1cb{72yEWH}TXEE}w>t!cY~rA+hNOTK8FAtIEoszp!qqptS&;r$ zaYV-NX96-h$6aR@1xz6_E0^N49mU)-v#bwtGJm)ibygzJ8!7|WIrcb`$XH~^!a#s& z{Db-0IOTFq#9!^j!n_F}#Z_nX{YzBK8XLPVmc&X`fT7!@$U-@2KM9soGbmOSAmqV z{nr$L^MBo_u^Joyf0E^=eo{Rt0{{e$IFA(#*kP@SQd6lWT2-#>` zP1)7_@IO!9lk>Zt?#CU?cuhiLF&)+XEM9B)cS(gvQT!X3`wL*{fArTS;Ak`J<84du zALKPz4}3nlG8Fo^MH0L|oK2-4xIY!~Oux~1sw!+It)&D3p;+N8AgqKI`ld6v71wy8I!eP0o~=RVcFQR2Gr(eP_JbSytoQ$Yt}l*4r@A8Me94y z8cTDWhqlq^qoAhbOzGBXv^Wa4vUz$(7B!mX`T=x_ueKRRDfg&Uc-e1+z4x$jyW_Pm zp?U;-R#xt^Z8Ev~`m`iL4*c#65Nn)q#=Y0l1AuD&+{|8-Gsij3LUZXpM0Bx0u7WWm zH|%yE@-#XEph2}-$-thl+S;__ciBxSSzHveP%~v}5I%u!z_l_KoW{KRx2=eB33umE zIYFtu^5=wGU`Jab8#}cnYry@9p5UE#U|VVvx_4l49JQ;jQdp(uw=$^A$EA$LM%vmE zvdEOaIcp5qX8wX{mYf0;#51~imYYPn4=k&#DsKTxo{_Mg*;S495?OBY?#gv=edYC* z^O@-sd-qa+U24xvcbL0@C7_6o!$`)sVr-jSJE4XQUQ$?L7}2(}Eixqv;L8AdJAVqc zq}RPgpnDb@E_;?6K58r3h4-!4rT4Ab#rLHLX?eMOfluJk=3i1@Gt1i#iA=O`M0@x! z(HtJP9BMHXEzuD93m|B&woj0g6T?f#^)>J>|I4C5?Gam>n9!8CT%~aT;=oco5d6U8 zMXl(=W;$ND_8+DD*?|5bJ!;8ebESXMUKBAf7YBwNVJibGaJ*(2G`F%wx)grqVPjudiaq^Kl&g$8A2 zWMxMr@_$c}d+;_B`#kUX-t|4VKH&_f^^EP0&=DPLW)H)UzBG%%Tra*5 z%$kyZe3I&S#gfie^z5)!twG={3Cuh)FdeA!Kj<-9** zvT*5%Tb`|QbE!iW-XcOuy39>D3oe6x{>&<#E$o8Ac|j)wq#kQzz|ATd=Z0K!p2$QE zPu?jL8Lb^y3_CQE{*}sTDe!2!dtlFjq&YLY@2#4>XS`}v#PLrpvc4*@q^O{mmnr5D zmyJq~t?8>FWU5vZdE(%4cuZuao0GNjp3~Dt*SLaxI#g_u>hu@k&9Ho*#CZP~lFJHj z(e!SYlLigyc?&5-YxlE{uuk$9b&l6d`uIlpg_z15dPo*iU&|Khx2*A5Fp;8iK_bdP z?T6|^7@lcx2j0T@x>X7|kuuBSB7<^zeY~R~4McconTxA2flHC0_jFxmSTv-~?zVT| zG_|yDqa9lkF*B6_{j=T>=M8r<0s;@z#h)3BQ4NLl@`Xr__o7;~M&dL3J8fP&zLfDfy z);ckcTev{@OUlZ`bCo(-3? z1u1xD`PKgSg?RqeVVsF<1SLF;XYA@Bsa&cY!I48ZJn1V<3d!?s=St?TLo zC0cNr`qD*M#s6f~X>SCNVkva^9A2ZP>CoJ9bvgXe_c}WdX-)pHM5m7O zrHt#g$F0AO+nGA;7dSJ?)|Mo~cf{z2L)Rz!`fpi73Zv)H=a5K)*$5sf_IZypi($P5 zsPwUc4~P-J1@^3C6-r9{V-u0Z&Sl7vNfmuMY4yy*cL>_)BmQF!8Om9Dej%cHxbIzA zhtV0d{=%cr?;bpBPjt@4w=#<>k5ee=TiWAXM2~tUGfm z$s&!Dm0R^V$}fOR*B^kGaipi~rx~A2cS0;t&khV1a4u38*XRUP~f za!rZMtay8bsLt6yFYl@>-y^31(*P!L^^s@mslZy(SMsv9bVoX`O#yBgEcjCmGpyc* zeH$Dw6vB5P*;jor+JOX@;6K#+xc)Z9B8M=x2a@Wx-{snPGpRmOC$zpsqW*JCh@M2Y z#K+M(>=#d^>Of9C`))h<=Bsy)6zaMJ&x-t%&+UcpLjV`jo4R2025 zXaG8EA!0lQa)|dx-@{O)qP6`$rhCkoQqZ`^SW8g-kOwrwsK8 z3ms*AIcyj}-1x&A&vSq{r=QMyp3CHdWH35!sad#!Sm>^|-|afB+Q;|Iq@LFgqIp#Z zD1%H+3I?6RGnk&IFo|u+E0dCxXz4yI^1i!QTu7uvIEH>i3rR{srcST`LIRwdV1P;W z+%AN1NIf@xxvVLiSX`8ILA8MzNqE&7>%jMzGt9wm78bo9<;h*W84i29^w!>V>{N+S zd`5Zmz^G;f=icvoOZfK5#1ctx*~UwD=ab4DGQXehQ!XYnak*dee%YN$_ZPL%KZuz$ zD;$PpT;HM^$KwtQm@7uvT`i6>Hae1CoRVM2)NL<2-k2PiX=eAx+-6j#JI?M}(tuBW zkF%jjLR)O`gI2fcPBxF^HeI|DWwQWHVR!;;{BXXHskxh8F@BMDn`oEi-NHt;CLymW z=KSv5)3dyzec0T5B*`g-MQ<;gz=nIWKUi9ko<|4I(-E0k$QncH>E4l z**1w&#={&zv4Tvhgz#c29`m|;lU-jmaXFMC11 z*dlXDMEOG>VoLMc>!rApwOu2prKSi*!w%`yzGmS+k(zm*CsLK*wv{S_0WX^8A-rKy zbk^Gf_92^7iB_uUF)EE+ET4d|X|>d&mdN?x@vxKAQk`O+r4Qdu>XGy(a(19g;=jU} zFX{O*_NG>!$@jh!U369Lnc+D~qch3uT+_Amyi}*k#LAAwh}k8IPK5a-WZ81ufD>l> z$4cF}GSz>ce`3FAic}6W4Z7m9KGO?(eWqi@L|5Hq0@L|&2flN1PVl}XgQ2q*_n2s3 zt5KtowNkTYB5b;SVuoXA@i5irXO)A&%7?V`1@HGCB&)Wgk+l|^XXChq;u(nyPB}b3 zY>m5jkxpZgi)zfbgv&ec4Zqdvm+D<?Im*mXweS9H+V>)zF#Zp3)bhl$PbISY{5=_z!8&*Jv~NYtI-g!>fDs zmvL5O^U%!^VaKA9gvKw|5?-jk>~%CVGvctKmP$kpnpfN{D8@X*Aazi$txfa%vd-|E z>kYmV66W!lNekJPom29LdZ%(I+ZLZYTXzTg*to~m?7vp%{V<~>H+2}PQ?PPAq`36R z<%wR8v6UkS>Wt#hzGk#44W<%9S=nBfB);6clKwnxY}T*w21Qc3_?IJ@4gYzC7s;WP zVQNI(M=S=JT#xsZy7G`cR(BP9*je0bfeN8JN5~zY(DDs0t{LpHOIbN);?T-69Pf3R zSNe*&p2%AwXHL>__g+xd4Hlc_vu<25H?(`nafS%)3UPP7_4;gk-9ckt8SJRTv5v0M z_Hww`qPudL?ajIR&X*;$y-`<)6dxx1U~5eGS13CB!lX;3w7n&lDDiArbAhSycd}+b zya_3p@A`$kQy;|NJZ~s44Hqo7Hwt}X86NK=(ey>lgWTtGL6k@Gy;PbO!M%1~Wcn2k zUFP|*5d>t-X*RU8g%>|(wwj*~#l4z^Aatf^DWd1Wj#Q*AY0D^V@sC`M zjJc6qXu0I7Y*2;;gGu!plAFzG=J;1%eIOdn zQA>J&e05UN*7I5@yRhK|lbBSfJ+5Uq;!&HV@xfPZrgD}kE*1DSq^=%{o%|LChhl#0 zlMb<^a6ixzpd{kNZr|3jTGeEzuo}-eLT-)Q$#b{!vKx8Tg}swCni>{#%vDY$Ww$84 zew3c9BBovqb}_&BRo#^!G(1Eg((BScRZ}C)Oz?y`T5wOrv);)b^4XR8 zhJo7+<^7)qB>I;46!GySzdneZ>n_E1oWZY;kf94#)s)kWjuJN1c+wbVoNQcmnv}{> zN0pF+Sl3E}UQ$}slSZeLJrwT>Sr}#V(dVaezCQl2|4LN`7L7v&siYR|r7M(*JYfR$ zst3=YaDw$FSc{g}KHO&QiKxuhEzF{f%RJLKe3p*7=oo`WNP)M(9X1zIQPP0XHhY3c znrP{$4#Ol$A0s|4S7Gx2L23dv*Gv2o;h((XVn+9+$qvm}s%zi6nI-_s6?mG! zj{DV;qesJb&owKeEK?=J>UcAlYckA7Sl+I&IN=yasrZOkejir*kE@SN`fk<8Fgx*$ zy&fE6?}G)d_N`){P~U@1jRVA|2*69)KSe_}!~?+`Yb{Y=O~_+@!j<&oVQQMnhoIRU zA0CyF1OFfkK44n*JD~!2!SCPM;PRSk%1XL=0&rz00wxPs&-_eapJy#$h!eqY%nS0{ z!aGg58JIJPF3_ci%n)QSVpa2H`vIe$RD43;#IRfDV&Ibit z+?>HW4{2wOfC6Fw)}4x}i1maDxcE1qi@BS*qcxD2gE@h3#4cgU*D-&3z7D|tVZWt= z-Cy2+*Cm@P4GN_TPUtaVyVesbVDazF@)j8VJ4>XZv!f%}&eO1SvIgr}4`A*3#vat< z_MoByL(qW6L7SFZ#|Gc1fFN)L2PxY+{B8tJp+pxRyz*87)vXR}*=&ahXjBlQKguuf zX6x<<6fQulE^C*KH8~W%ptpaC0l?b=_{~*U4?5Vt;dgM4t_{&UZ1C2j?b>b+5}{IF_CUyvz-@QZPMlJ)r_tS$9kH%RPv#2_nMb zRLj5;chJ72*U`Z@Dqt4$@_+k$%|8m(HqLG!qT4P^DdfvGf&){gKnGCX#H0!;W=AGP zbA&Z`-__a)VTS}kKFjWGk z%|>yE?t*EJ!qeQ%dPk$;xIQ+P0;()PCBDgjJm6Buj{f^awNoVx+9<|lg3%-$G(*f) zll6oOkN|yamn1uyl2*N-lnqRI1cvs_JxLTeahEK=THV$Sz*gQhKNb*p0fNoda#-&F zB-qJgW^g}!TtM|0bS2QZekW7_tKu%GcJ!4?lObt0z_$mZ4rbQ0o=^curCs3bJK6sq z9fu-aW-l#>z~ca(B;4yv;2RZ?tGYAU)^)Kz{L|4oPj zdOf_?de|#yS)p2v8-N||+XL=O*%3+y)oI(HbM)Ds?q8~HPzIP(vs*G`iddbWq}! z(2!VjP&{Z1w+%eUq^ /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -133,22 +131,29 @@ location of your Java installation." fi else JAVACMD=java - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac case $MAX_FD in #( '' | soft) :;; #( *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -193,11 +198,15 @@ if "$cygwin" || "$msys" ; then done fi -# Collect all arguments for the java command; -# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of -# shell script including quotes and variable substitutions, so put them in -# double quotes to make sure that they get re-expanded; and -# * put everything else in single quotes, so that it's not re-expanded. + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ diff --git a/js/react_native/android/gradlew.bat b/js/react_native/android/gradlew.bat index f127cfd49d402..25da30dbdeee9 100644 --- a/js/react_native/android/gradlew.bat +++ b/js/react_native/android/gradlew.bat @@ -26,6 +26,7 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% @@ -42,11 +43,11 @@ set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 if %ERRORLEVEL% equ 0 goto execute -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail @@ -56,11 +57,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe if exist "%JAVA_EXE%" goto execute -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail From bf4d3e1a5b310ffecd97c4babfc0e35a3045f83a Mon Sep 17 00:00:00 2001 From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:23:01 -0800 Subject: [PATCH 08/13] Update vcpkg.json - lock flatbuffer version (#23046) ### Description Locking version introduced in: https://github.com/microsoft/onnxruntime/blob/03ea5dc495bfb48977d23705fa7cf184866a9f7f/onnxruntime/core/flatbuffers/schema/ort_training_checkpoint.fbs.h#L11-L13 ### Motivation and Context Resolve issue for version `>=1.20.` https://github.com/microsoft/onnxruntime/issues/22666 --- cmake/vcpkg.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmake/vcpkg.json b/cmake/vcpkg.json index 159b8654c1cb1..fcb2c7d5de89b 100644 --- a/cmake/vcpkg.json +++ b/cmake/vcpkg.json @@ -66,6 +66,12 @@ "platform": "windows" } ], + "overrides": [ + { + "name": "flatbuffers", + "version": "23.5.26" + } + ], "features": { "tests": { "description": "Build ONNXRuntime unit tests", From defcc4f819771d1a43f9c757f2636d8f260b394c Mon Sep 17 00:00:00 2001 From: Jiajia Qin Date: Wed, 11 Dec 2024 06:58:57 +0800 Subject: [PATCH 09/13] [webgpu] Optimize Expand (#23052) ### Description Use components = 4 if possible. This is the webgpu native implementation from #22752 --- .../core/providers/webgpu/tensor/expand.cc | 41 +++++++++++++------ .../test/providers/cpu/tensor/expand_test.cc | 14 +++++++ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/providers/webgpu/tensor/expand.cc b/onnxruntime/core/providers/webgpu/tensor/expand.cc index 9fc886cb69bbf..809616660aa9e 100644 --- a/onnxruntime/core/providers/webgpu/tensor/expand.cc +++ b/onnxruntime/core/providers/webgpu/tensor/expand.cc @@ -11,14 +11,20 @@ namespace onnxruntime { namespace webgpu { Status ExpandProgram::GenerateShaderCode(ShaderHelper& shader) const { - const auto& input = shader.AddInput("input", ShaderUsage::UseUniform); + const auto& input = shader.AddInput("input", ShaderUsage::UseUniform | ShaderUsage::UseValueTypeAlias); const auto& output = shader.AddOutput("output", ShaderUsage::UseUniform); - - shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.data_size") - << " let output_indices = " << output.OffsetToIndices("global_idx") << ";\n" - << " let input_offset = " << input.BroadcastedIndicesToOffset("output_indices", output) << ";\n " - << output.SetByOffset("global_idx", input.GetByOffset("input_offset")); - + shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.data_size"); + if (input.NumComponents() != output.NumComponents()) { + const auto& output_indices = shader.AddIndices("output_indices"); + shader.MainFunctionBody() << " let output_indices = " << output_indices.OffsetToIndices("global_idx * 4") << ";\n" + << " let input_offset = " << input.BroadcastedIndicesToOffset("output_indices", output_indices) << ";\n " + << " let value = vec4(" << input.GetByOffset("input_offset") << ");\n" + << output.SetByOffset("global_idx", "value"); + } else { + shader.MainFunctionBody() << " let output_indices = " << output.OffsetToIndices("global_idx") << ";\n" + << " let input_offset = " << input.BroadcastedIndicesToOffset("output_indices", output) << ";\n " + << output.SetByOffset("global_idx", input.GetByOffset("input_offset")); + } return Status::OK(); } @@ -28,18 +34,27 @@ Status Expand::ComputeInternal(ComputeContext& context) const { auto output_dims = input_shape_tensor->DataAsSpan(); TensorShape output_shape{}; - ORT_RETURN_IF_ERROR(ComputeBroadcastOutputShape(Node().Name(), input_tensor->Shape(), output_dims, output_shape)); + TensorShape input_shape = input_tensor->Shape(); + ORT_RETURN_IF_ERROR(ComputeBroadcastOutputShape(Node().Name(), input_shape, output_dims, output_shape)); auto* output_tensor = context.Output(0, output_shape); - uint32_t data_size = gsl::narrow(output_shape.Size()); + const int components_i = input_shape.IsScalar() ? 1 : input_shape[input_shape.NumDimensions() - 1] % 4 == 0 ? 4 + : 1; + const int components_o = output_shape.IsScalar() ? 1 : output_shape[output_shape.NumDimensions() - 1] % 4 == 0 ? 4 + : 1; + uint32_t data_size = gsl::narrow(output_shape.Size() / components_o); + ExpandProgram program{}; program - .AddInputs({{input_tensor, ProgramTensorMetadataDependency::TypeAndRank}}) - .AddOutputs({{output_tensor, ProgramTensorMetadataDependency::Rank}}) + .AddInputs({{input_tensor, ProgramTensorMetadataDependency::TypeAndRank, components_i}}) + .AddOutputs({{output_tensor, ProgramTensorMetadataDependency::TypeAndRank, components_o}}) .SetDispatchGroupSize((data_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE) .AddUniformVariables({ {data_size}, }); + if (components_i != components_o) { + program.AddIndices(output_shape); + } return context.RunProgram(program); } @@ -55,8 +70,8 @@ Status Expand::ComputeInternal(ComputeContext& context) const { KernelDefBuilder().TypeConstraint("T", TYPE).InputMemoryType(OrtMemTypeCPU, 1), \ KERNEL_CLASS); -WEBGPU_EXPAND_VERSIONED_KERNEL(Expand, 8, 12, Expand, WebGpuSupportedFloatTypes()) -WEBGPU_EXPAND_KERNEL(Expand, 13, Expand, WebGpuSupportedFloatTypes()) +WEBGPU_EXPAND_VERSIONED_KERNEL(Expand, 8, 12, Expand, WebGpuSupportedNumberTypes()) +WEBGPU_EXPAND_KERNEL(Expand, 13, Expand, WebGpuSupportedNumberTypes()) } // namespace webgpu } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/tensor/expand_test.cc b/onnxruntime/test/providers/cpu/tensor/expand_test.cc index 4b0f4e84ca37d..38e3bc3af6d6b 100644 --- a/onnxruntime/test/providers/cpu/tensor/expand_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/expand_test.cc @@ -167,6 +167,20 @@ TEST(ExpandOpTest, Expand_2x2x1x2x1_float) { test.Run(); } +TEST(ExpandOpTest, Expand_3x1x8_float) { + OpTester test("Expand", 8); + test.AddInput("data_0", {3, 2, 1}, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}); + test.AddInput("data_1", {3}, {3, 1, 8}); + test.AddOutput("result", {3, 2, 8}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, + 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, 3.0f, + 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}); + test.Run(); +} + #ifndef USE_TENSORRT TEST(ExpandOpTest, Expand_scalar_float) { OpTester test("Expand", 8); From fa6ad202aa9d591945628100ececa6571279efff Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:44:36 -0800 Subject: [PATCH 10/13] Minor updates to onnxruntime_java.cmake (#23068) - Use `ANDROID` instead of `CMAKE_SYSTEM_NAME STREQUAL "Android"`. - Put common gradle arguments into `COMMON_GRADLE_ARGS` to make them easier to reuse. --- cmake/onnxruntime_java.cmake | 49 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/cmake/onnxruntime_java.cmake b/cmake/onnxruntime_java.cmake index 765ebab111ac7..b15b9632e9e24 100644 --- a/cmake/onnxruntime_java.cmake +++ b/cmake/onnxruntime_java.cmake @@ -7,7 +7,7 @@ include(FindJava) find_package(Java REQUIRED) include(UseJava) -if (NOT CMAKE_SYSTEM_NAME STREQUAL "Android") +if (NOT ANDROID) find_package(JNI REQUIRED) endif() @@ -21,23 +21,28 @@ endif() set(GRADLE_EXECUTABLE "${JAVA_ROOT}/gradlew") +set(COMMON_GRADLE_ARGS --console=plain) +if(WIN32) + list(APPEND COMMON_GRADLE_ARGS -Dorg.gradle.daemon=false) +elseif (ANDROID) + # For Android build, we may run gradle multiple times in same build, + # sometimes gradle JVM will run out of memory if we keep the daemon running + # it is better to not keep a daemon running + list(APPEND COMMON_GRADLE_ARGS --no-daemon) +endif() + # Specify the Java source files file(GLOB_RECURSE onnxruntime4j_gradle_files "${JAVA_ROOT}/*.gradle") file(GLOB_RECURSE onnxruntime4j_src "${JAVA_ROOT}/src/main/java/ai/onnxruntime/*.java") set(JAVA_OUTPUT_JAR ${JAVA_ROOT}/build/libs/onnxruntime.jar) # this jar is solely used to signaling mechanism for dependency management in CMake # if any of the Java sources change, the jar (and generated headers) will be regenerated and the onnxruntime4j_jni target will be rebuilt -set(GRADLE_ARGS --console=plain clean jar -x test) -if(WIN32) - set(GRADLE_ARGS ${GRADLE_ARGS} -Dorg.gradle.daemon=false) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Android") - # For Android build, we may run gradle multiple times in same build, - # sometimes gradle JVM will run out of memory if we keep the daemon running - # it is better to not keep a daemon running - set(GRADLE_ARGS ${GRADLE_ARGS} --no-daemon) -endif() +set(GRADLE_ARGS clean jar -x test) -add_custom_command(OUTPUT ${JAVA_OUTPUT_JAR} COMMAND ${GRADLE_EXECUTABLE} ${GRADLE_ARGS} WORKING_DIRECTORY ${JAVA_ROOT} DEPENDS ${onnxruntime4j_gradle_files} ${onnxruntime4j_src}) +add_custom_command(OUTPUT ${JAVA_OUTPUT_JAR} + COMMAND ${GRADLE_EXECUTABLE} ${COMMON_GRADLE_ARGS} ${GRADLE_ARGS} + WORKING_DIRECTORY ${JAVA_ROOT} + DEPENDS ${onnxruntime4j_gradle_files} ${onnxruntime4j_src}) add_custom_target(onnxruntime4j DEPENDS ${JAVA_OUTPUT_JAR}) set_source_files_properties(${JAVA_OUTPUT_JAR} PROPERTIES GENERATED TRUE) set_property(TARGET onnxruntime4j APPEND PROPERTY ADDITIONAL_CLEAN_FILES "${JAVA_OUTPUT_DIR}") @@ -62,7 +67,7 @@ target_link_libraries(onnxruntime4j_jni PUBLIC onnxruntime) set(JAVA_PACKAGE_OUTPUT_DIR ${JAVA_OUTPUT_DIR}/build) file(MAKE_DIRECTORY ${JAVA_PACKAGE_OUTPUT_DIR}) -if (CMAKE_SYSTEM_NAME STREQUAL "Android") +if (ANDROID) set(ANDROID_PACKAGE_OUTPUT_DIR ${JAVA_PACKAGE_OUTPUT_DIR}/android) file(MAKE_DIRECTORY ${ANDROID_PACKAGE_OUTPUT_DIR}) endif() @@ -88,7 +93,7 @@ if(APPLE) elseif(JNI_ARCH STREQUAL "arm64") set(JNI_ARCH aarch64) endif() -elseif (CMAKE_SYSTEM_NAME STREQUAL "Android") +elseif (ANDROID) set(JNI_ARCH ${ANDROID_ABI}) elseif (ARM64) set(JNI_ARCH aarch64) @@ -180,15 +185,7 @@ else() endif() # run the build process (this copies the results back into CMAKE_CURRENT_BINARY_DIR) -set(GRADLE_ARGS --console=plain cmakeBuild -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR}) -if(WIN32) - set(GRADLE_ARGS ${GRADLE_ARGS} -Dorg.gradle.daemon=false) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Android") - # For Android build, we may run gradle multiple times in same build, - # sometimes gradle JVM will run out of memory if we keep the daemon running - # it is better to not keep a daemon running - set(GRADLE_ARGS ${GRADLE_ARGS} --no-daemon) -endif() +set(GRADLE_ARGS cmakeBuild -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR}) # Append relevant native build flags to gradle command set(GRADLE_ARGS ${GRADLE_ARGS} ${ORT_PROVIDER_FLAGS}) @@ -197,9 +194,11 @@ if (onnxruntime_ENABLE_TRAINING_APIS) endif() message(STATUS "GRADLE_ARGS: ${GRADLE_ARGS}") -add_custom_command(TARGET onnxruntime4j_jni POST_BUILD COMMAND ${GRADLE_EXECUTABLE} ${GRADLE_ARGS} WORKING_DIRECTORY ${JAVA_ROOT}) +add_custom_command(TARGET onnxruntime4j_jni POST_BUILD + COMMAND ${GRADLE_EXECUTABLE} ${COMMON_GRADLE_ARGS} ${GRADLE_ARGS} + WORKING_DIRECTORY ${JAVA_ROOT}) -if (CMAKE_SYSTEM_NAME STREQUAL "Android") +if (ANDROID) set(ANDROID_PACKAGE_JNILIBS_DIR ${JAVA_OUTPUT_DIR}/android) set(ANDROID_PACKAGE_ABI_DIR ${ANDROID_PACKAGE_JNILIBS_DIR}/${ANDROID_ABI}) file(MAKE_DIRECTORY ${ANDROID_PACKAGE_JNILIBS_DIR}) @@ -214,6 +213,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Android") POST_BUILD COMMAND ${CMAKE_COMMAND} -E echo "Generating Android AAR package..." COMMAND ${GRADLE_EXECUTABLE} + ${COMMON_GRADLE_ARGS} build -b build-android.gradle -c settings-android.gradle -DjniLibsDir=${ANDROID_PACKAGE_JNILIBS_DIR} -DbuildDir=${ANDROID_PACKAGE_OUTPUT_DIR} @@ -237,6 +237,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Android") POST_BUILD COMMAND ${CMAKE_COMMAND} -E echo "Building and running Android test for Android AAR package..." COMMAND ${GRADLE_EXECUTABLE} + ${COMMON_GRADLE_ARGS} clean assembleDebug assembleDebugAndroidTest -DminSdkVer=${ANDROID_MIN_SDK} --stacktrace From 02f0af0d0801d6e613e16a86b4914a041a11e42f Mon Sep 17 00:00:00 2001 From: shiyi Date: Wed, 11 Dec 2024 07:48:16 +0800 Subject: [PATCH 11/13] [WebNN] Improve data type check of slice op (#22988) A follow-up of [[WebNN] Support negative steps for slice](https://github.com/microsoft/onnxruntime/pull/22871#discussion_r1847929774). Slice op is emulated by reverse+slice when steps < 0 so `SliceOpBuilder::HasSupportedInputsImpl()` should also check the supported data types of reverse. --------- Co-authored-by: Wanming Lin --- .../core/providers/webnn/builders/helper.cc | 27 +++++++++++++++---- .../core/providers/webnn/builders/helper.h | 7 +++++ .../webnn/builders/impl/base_op_builder.cc | 8 +++--- .../webnn/builders/impl/base_op_builder.h | 4 +-- .../webnn/builders/impl/binary_op_builder.cc | 8 +++--- .../webnn/builders/impl/cast_op_builder.cc | 8 +++--- .../webnn/builders/impl/concat_op_builder.cc | 8 +++--- .../webnn/builders/impl/conv_op_builder.cc | 8 +++--- .../webnn/builders/impl/einsum_op_builder.cc | 14 +++++----- .../impl/gatherElements_op_builder.cc | 7 ++--- .../builders/impl/gatherND_op_builder.cc | 8 +++--- .../webnn/builders/impl/gather_op_builder.cc | 8 +++--- .../webnn/builders/impl/gemm_op_builder.cc | 8 +++--- .../webnn/builders/impl/gru_op_builder.cc | 8 +++--- .../webnn/builders/impl/logical_op_builder.cc | 8 +++--- .../webnn/builders/impl/lstm_op_builder.cc | 8 +++--- .../webnn/builders/impl/max_min_op_builder.cc | 8 +++--- .../builders/impl/normalization_op_builder.cc | 7 ++--- .../webnn/builders/impl/qdq_op_builder.cc | 8 +++--- .../impl/scatterElements_op_builder.cc | 7 ++--- .../builders/impl/scatterND_op_builder.cc | 7 ++--- .../webnn/builders/impl/slice_op_builder.cc | 26 ++++++++++++++++++ .../webnn/builders/impl/ternary_op_builder.cc | 8 +++--- 23 files changed, 136 insertions(+), 82 deletions(-) diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc index f36f8283e9bf6..45a87960126cd 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.cc +++ b/onnxruntime/core/providers/webnn/builders/helper.cc @@ -178,14 +178,31 @@ bool IsDataTypeSupportedByOp(const std::string& onnx_op_type, if (!GetWebNNOpType(onnx_op_type, webnn_op_type)) return false; + return IsDataTypeSupportedByWebNNOp(onnx_op_type, webnn_op_type, onnx_data_type, wnn_limits, + webnn_input_output_name, onnx_input_output_name, logger); +} + +bool IsDataTypeSupportedByWebNNOp(const std::string& onnx_op_type, + const std::string& webnn_op_type, + const int32_t onnx_data_type, + const emscripten::val& wnn_limits, + const std::string& webnn_input_output_name, + const std::string& onnx_input_output_name, + const logging::Logger& logger) { + if (wnn_limits[webnn_op_type].isUndefined()) { + LOGS(logger, VERBOSE) << "[" << onnx_op_type << "] WebNN op [" << webnn_op_type << "] is not supported for now"; + return false; + } + if (wnn_limits[webnn_op_type][webnn_input_output_name].isUndefined()) { + LOGS(logger, VERBOSE) << "[" << onnx_op_type << "] WebNN op [" << webnn_op_type << "] doesn't have parameter [" + << webnn_input_output_name << "]"; + return false; + } if (!IsSupportedDataType(onnx_data_type, wnn_limits[webnn_op_type][webnn_input_output_name]["dataTypes"])) { - LOGS(logger, VERBOSE) << "[" << onnx_op_type - << "] " << onnx_input_output_name - << " type: [" << onnx_data_type - << "] is not supported for now"; + LOGS(logger, VERBOSE) << "[" << onnx_op_type << "] " << onnx_input_output_name << "'s data type: [" + << onnx_data_type << "] is not supported by WebNN op [" << webnn_op_type << "] for now"; return false; } - return true; } diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h index 7fdfc5aefa798..a06f46f1bdf0a 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.h +++ b/onnxruntime/core/providers/webnn/builders/helper.h @@ -340,6 +340,13 @@ bool IsDataTypeSupportedByOp(const std::string& onnx_op_type, const std::string& webnn_input_output_name, const std::string& onnx_input_output_name, const logging::Logger& logger); +bool IsDataTypeSupportedByWebNNOp(const std::string& onnx_op_type, + const std::string& webnn_op_type, + const int32_t onnx_data_type, + const emscripten::val& wnn_limits, + const std::string& webnn_input_output_name, + const std::string& onnx_input_output_name, + const logging::Logger& logger); bool GetBidirectionalBroadcastShape(std::vector& shape_a, std::vector& shape_b, diff --git a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc index 70fa0f9516c5c..290d16a48dd83 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc @@ -29,7 +29,7 @@ Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const Node& bool BaseOpBuilder::IsOpSupported(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType device_type, const emscripten::val& wnn_limits, const logging::Logger& logger) const { - if (!HasSupportedInputs(node, wnn_limits, logger)) + if (!HasSupportedInputs(initializers, node, wnn_limits, logger)) return false; if (!HasSupportedOutputs(node, wnn_limits, logger)) @@ -41,7 +41,7 @@ bool BaseOpBuilder::IsOpSupported(const InitializedTensorSet& initializers, cons return IsOpSupportedImpl(initializers, node, device_type, logger); } -bool BaseOpBuilder::HasSupportedInputs(const Node& node, const emscripten::val& wnn_limits, +bool BaseOpBuilder::HasSupportedInputs(const InitializedTensorSet& initializers, const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto node_name = MakeString("Node [", node.Name(), "] type [", node.OpType(), "]"); for (const auto* input : node.InputDefs()) { @@ -50,10 +50,10 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const emscripten::val& } } - return HasSupportedInputsImpl(node, wnn_limits, logger); + return HasSupportedInputsImpl(initializers, node, wnn_limits, logger); } -bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, +bool BaseOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& initializers, const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const { // We only check the type of input 0 by default, specific op builder can override this. diff --git a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h index 9412fa8026fb3..0a4367a71add4 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h @@ -40,7 +40,7 @@ class BaseOpBuilder : public IOpBuilder { return true; } - virtual bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, + virtual bool HasSupportedInputsImpl(const InitializedTensorSet& initializers, const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; virtual bool HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; @@ -56,7 +56,7 @@ class BaseOpBuilder : public IOpBuilder { private: bool HasSupportedOpSet(const Node& node, const logging::Logger& logger) const; - bool HasSupportedInputs(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; + bool HasSupportedInputs(const InitializedTensorSet& initializers, const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; bool HasSupportedOutputs(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; const bool allow_empty_tensor_as_input_; // Some operators can handle ignoring an empty tensor as input. diff --git a/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc index af82a01b14de5..e14507e8f5aea 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/binary_op_builder.cc @@ -22,8 +22,8 @@ class BinaryOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType device_type, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -86,8 +86,8 @@ bool BinaryOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers return true; } -bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool BinaryOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; diff --git a/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc index 70ebe18c85b86..4b2f04bed0eb1 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc @@ -21,8 +21,8 @@ class CastOpBuilder : public BaseOpBuilder { // Operator support related. private: - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -86,8 +86,8 @@ Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, } // Operator support related. -bool CastOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool CastOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input_type; diff --git a/onnxruntime/core/providers/webnn/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/concat_op_builder.cc index 1a0d93ae7eada..bac528300e077 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/concat_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/concat_op_builder.cc @@ -21,8 +21,8 @@ class ConcatOpBuilder : public BaseOpBuilder { const logging::Logger& logger) const override ORT_MUST_USE_RESULT; // Operator support related. - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -55,8 +55,8 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, return Status::OK(); } -bool ConcatOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool ConcatOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; diff --git a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc index 52fcc39ae5418..81e688ea4f8ea 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc @@ -29,8 +29,8 @@ class ConvOpBuilder : public BaseOpBuilder { private: bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType device_type, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const { @@ -397,8 +397,8 @@ bool ConvOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, return true; } -bool ConvOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool ConvOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; // input data type diff --git a/onnxruntime/core/providers/webnn/builders/impl/einsum_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/einsum_op_builder.cc index 931854d0f33c1..ef713f48b8135 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/einsum_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/einsum_op_builder.cc @@ -27,8 +27,8 @@ class EinsumOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Helper functions, thanks for DML EP's OperatorHelper. @@ -735,8 +735,8 @@ bool EinsumOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializ return true; } -bool EinsumOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool EinsumOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); @@ -776,11 +776,11 @@ bool EinsumOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten: return false; } else if (recognized_operator_type == RecognizedOperatorType::Pairwise) { // Map to WebNN's gemm or matmul - return IsDataTypeSupportedByOp("MatMul", input0_type, wnn_limits, "a", "inputs", logger); + return IsDataTypeSupportedByWebNNOp(op_type, "matmul", input0_type, wnn_limits, "a", "inputs", logger); } else if (recognized_operator_type == RecognizedOperatorType::ReduceSum) { - return IsDataTypeSupportedByOp("ReduceSum", input0_type, wnn_limits, "input", "inputs", logger); + return IsDataTypeSupportedByWebNNOp(op_type, "reduceSum", input0_type, wnn_limits, "input", "inputs", logger); } else { - return IsDataTypeSupportedByOp("Identity", input0_type, wnn_limits, "input", "inputs", logger); + return IsDataTypeSupportedByWebNNOp(op_type, "identity", input0_type, wnn_limits, "input", "inputs", logger); } } diff --git a/onnxruntime/core/providers/webnn/builders/impl/gatherElements_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gatherElements_op_builder.cc index 225cfcdfc852c..cb7b7de74e121 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/gatherElements_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/gatherElements_op_builder.cc @@ -20,8 +20,8 @@ class GatherElementsOpBuilder : public BaseOpBuilder { const logging::Logger& logger) const override ORT_MUST_USE_RESULT; // Operator support related. - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -49,7 +49,8 @@ Status GatherElementsOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builde // Operator support related. -bool GatherElementsOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, +bool GatherElementsOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& data = *node.InputDefs()[0]; const auto& indices = *node.InputDefs()[1]; diff --git a/onnxruntime/core/providers/webnn/builders/impl/gatherND_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gatherND_op_builder.cc index cb4f85a40ee12..002a1a6a63026 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/gatherND_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/gatherND_op_builder.cc @@ -22,8 +22,8 @@ class GatherNDOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -55,8 +55,8 @@ bool GatherNDOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initial return true; } -bool GatherNDOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool GatherNDOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& data = *node.InputDefs()[0]; const auto& indices = *node.InputDefs()[1]; const auto& op_type = node.OpType(); diff --git a/onnxruntime/core/providers/webnn/builders/impl/gather_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gather_op_builder.cc index ae9fe3e3f3bd1..88d22f103cadc 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/gather_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/gather_op_builder.cc @@ -22,8 +22,8 @@ class GatherOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -69,8 +69,8 @@ bool GatherOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializ return true; } -bool GatherOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool GatherOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input = *node.InputDefs()[0]; const auto& indices = *node.InputDefs()[1]; const auto& op_type = node.OpType(); diff --git a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc index 252d49a2f4d4d..5f4e6de8fda98 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc @@ -25,8 +25,8 @@ class GemmOpBuilder : public BaseOpBuilder { private: bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -215,8 +215,8 @@ bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializer return true; } -bool GemmOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool GemmOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; // A data type diff --git a/onnxruntime/core/providers/webnn/builders/impl/gru_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gru_op_builder.cc index ffb9b7fbf2e7a..b240e30d38b22 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/gru_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/gru_op_builder.cc @@ -26,8 +26,8 @@ class GruOpBuilder : public BaseOpBuilder { private: bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType /*device_type*/, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; bool HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; @@ -187,8 +187,8 @@ bool GruOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, c return true; } -bool GruOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool GruOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input_X_type = 0; // input data type diff --git a/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc index d56fdbc08c677..91910f55f37c7 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/logical_op_builder.cc @@ -21,8 +21,8 @@ class LogicalOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -71,8 +71,8 @@ bool LogicalOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initiali return true; } -bool LogicalOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool LogicalOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; diff --git a/onnxruntime/core/providers/webnn/builders/impl/lstm_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/lstm_op_builder.cc index 6213b039fb2f9..33ba22ac3fb5b 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/lstm_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/lstm_op_builder.cc @@ -25,8 +25,8 @@ class LstmOpBuilder : public BaseOpBuilder { private: bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType /*device_type*/, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; bool HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; @@ -198,8 +198,8 @@ bool LstmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, return true; } -bool LstmOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool LstmOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type = 0; // input data type diff --git a/onnxruntime/core/providers/webnn/builders/impl/max_min_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/max_min_op_builder.cc index e111ca412c6e9..40f94186e9ed6 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/max_min_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/max_min_op_builder.cc @@ -22,8 +22,8 @@ class MaxMinOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -87,8 +87,8 @@ bool MaxMinOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializ return true; } -bool MaxMinOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool MaxMinOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; diff --git a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc index 79ed0393e3044..50e49884bdfa9 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc @@ -25,8 +25,8 @@ class NormalizationOpBuilder : public BaseOpBuilder { private: bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, @@ -228,7 +228,8 @@ bool NormalizationOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initi return true; } -bool NormalizationOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, +bool NormalizationOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); diff --git a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc index ca15e123d0999..b71507a871bf6 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc @@ -22,8 +22,8 @@ class QDQOpBuilder : public BaseOpBuilder { const logging::Logger& logger) const override ORT_MUST_USE_RESULT; // Operator support related. - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; Status QDQOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, @@ -118,8 +118,8 @@ Status QDQOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, return Status::OK(); } -bool QDQOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool QDQOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type = 0; // input data type diff --git a/onnxruntime/core/providers/webnn/builders/impl/scatterElements_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/scatterElements_op_builder.cc index c786aa468736c..8c70525835059 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/scatterElements_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/scatterElements_op_builder.cc @@ -22,8 +22,8 @@ class ScatterElementsOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -65,7 +65,8 @@ bool ScatterElementsOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* return true; } -bool ScatterElementsOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, +bool ScatterElementsOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& data = *node.InputDefs()[0]; const auto& indices = *node.InputDefs()[1]; diff --git a/onnxruntime/core/providers/webnn/builders/impl/scatterND_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/scatterND_op_builder.cc index feb93cc14b7c4..8089b9706886f 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/scatterND_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/scatterND_op_builder.cc @@ -22,8 +22,8 @@ class ScatterNDOpBuilder : public BaseOpBuilder { // Operator support related. bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -57,7 +57,8 @@ bool ScatterNDOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initia return true; } -bool ScatterNDOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, +bool ScatterNDOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& data = *node.InputDefs()[0]; const auto& indices = *node.InputDefs()[1]; diff --git a/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc index d51297f19f1c2..41c66038c2694 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/slice_op_builder.cc @@ -27,6 +27,8 @@ class SliceOpBuilder : public BaseOpBuilder { const logging::Logger& logger) const override ORT_MUST_USE_RESULT; bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node, const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& initializers, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; // TODO: Support Slice opset < 10, which uses attributes for starts and ends. int GetMinSupportedOpSet(const Node& /* node */) const override { return 10; } }; @@ -161,6 +163,30 @@ bool SliceOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers, return true; } +bool SliceOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& initializers, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { + const auto& input_defs = node.InputDefs(); + const auto& input = *input_defs[0]; + const auto& op_type = node.OpType(); + int32_t input_type; + if (!GetType(input, input_type, logger)) + return false; + + // If there is step < 0, check data type support of reverse. + if (input_defs.size() > 4 && input_defs[4]->Exists()) { + std::vector steps; + if (!ReadIntArrayFrom1DTensor(*initializers.at(input_defs[4]->Name()), steps, logger)) + return false; + if (std::any_of(steps.begin(), steps.end(), [](int64_t step) { return step < 0; })) { + if (!IsDataTypeSupportedByWebNNOp(op_type, "reverse", input_type, wnn_limits, "input", "data", logger)) { + return false; + } + } + } + + return IsDataTypeSupportedByOp(op_type, input_type, wnn_limits, "input", "data", logger); +} + void CreateSliceOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { op_registrations.builders.push_back(std::make_unique()); op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get()); diff --git a/onnxruntime/core/providers/webnn/builders/impl/ternary_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/ternary_op_builder.cc index 4b6cf312074ba..c7b3129c0c85b 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/ternary_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/ternary_op_builder.cc @@ -18,8 +18,8 @@ class TernaryOpBuilder : public BaseOpBuilder { private: Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const override ORT_MUST_USE_RESULT; - bool HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; }; // Add operator related. @@ -46,8 +46,8 @@ Status TernaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, cons return Status::OK(); } -bool TernaryOpBuilder::HasSupportedInputsImpl(const Node& node, const emscripten::val& wnn_limits, - const logging::Logger& logger) const { +bool TernaryOpBuilder::HasSupportedInputsImpl(const InitializedTensorSet& /* initializers */, const Node& node, + const emscripten::val& wnn_limits, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); const auto& op_type = node.OpType(); int32_t input0_type; // condition data type From d8de3c4096d61c70e1c7efd94b39068f44152139 Mon Sep 17 00:00:00 2001 From: amancini-N <63410090+amancini-N@users.noreply.github.com> Date: Wed, 11 Dec 2024 01:20:47 +0100 Subject: [PATCH 12/13] [CUDA EP] Fix BeamSearch on T5 with sequence_as_input_ids (#20667) (#20668) ### Description Change the implementation of BeamSearch op when using CUDA EP: in case of T5 model, and in case the decoder input_ids are sequences, copy the sequences device-to-device instead of host-to-device ### Motivation and Context - Fixes #20667 --- .../cpu/transformers/beam_search_impl_t5.h | 26 ++++++--- .../cpu/transformers/generation_shared.h | 1 + .../contrib_ops/cpu/transformers/sequences.cc | 4 ++ .../contrib_ops/cpu/transformers/sequences.h | 3 + .../cpu/transformers/subgraph_t5_decoder.cc | 52 ++++++++++++------ .../cpu/transformers/subgraph_t5_decoder.h | 3 +- .../transformers/generation_device_helper.cc | 18 +++--- .../test/contrib_ops/beam_search_test.cc | 14 +++++ .../dummy_t5_with_sequence_input_ids.onnx | Bin 0 -> 6754 bytes 9 files changed, 84 insertions(+), 37 deletions(-) create mode 100644 onnxruntime/test/testdata/dummy_t5_with_sequence_input_ids.onnx diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h index 8f5cdc97f27e5..b67d003eaceeb 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h +++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h @@ -258,7 +258,8 @@ Status BeamSearchT5::Execute(const FeedsFetchesManager& encoder_feeds_fetches current_length, cpu_state.sequences, parameters->max_length, - decoder_subgraph_.has_decoder_masked_attention_)); + decoder_subgraph_.has_decoder_masked_attention_, + this->cuda_device_prop_ != nullptr)); if (decoder_subgraph_.past_present_share_buffer_) { decoder_fetches.reserve(static_cast(decoder_subgraph_.GetFirstPresentOutputIndex()) + @@ -302,17 +303,24 @@ Status BeamSearchT5::Execute(const FeedsFetchesManager& encoder_feeds_fetches auto cur_len = std::to_string(current_length); dumper->Print("***CurrentLength", cur_len, true); - for (int i = 0; i <= decoder_subgraph_.GetFirstPastInputIndex(); i++) { + for (int i = 0; i < decoder_subgraph_.GetFirstPastInputIndex(); i++) { dumper->Print("decoder_feeds", i, true); dumper->Print("", decoder_feeds[i]); } - auto offset = decoder_subgraph_.GetFirstPastInputIndex() + 4 * decoder_subgraph_.num_layers; - dumper->Print("past_sequence_length", offset, true); - dumper->Print("", decoder_feeds[offset]); - dumper->Print("beam_width", offset + 1, true); - dumper->Print("", decoder_feeds[offset + 1]); - dumper->Print("cache_redir", offset + 2, true); - dumper->Print("", decoder_feeds[offset + 2]); + for (int i = 0; i < decoder_subgraph_.num_layers; i++) { + int self_key_idx = decoder_subgraph_.GetFirstPastInputIndex() + 2 * i; + int self_value_idx = self_key_idx + 1; + dumper->Print("past_key_self", i, true); + dumper->Print("", decoder_feeds[self_key_idx]); + dumper->Print("past_value_self", i + 1, true); + dumper->Print("", decoder_feeds[self_value_idx]); + int cross_key_idx = decoder_subgraph_.GetFirstPastInputIndex() + 2 * decoder_subgraph_.num_layers + 2 * i; + int cross_value_idx = cross_key_idx + 1; + dumper->Print("past_key_cross", i, true); + dumper->Print("", decoder_feeds[cross_key_idx]); + dumper->Print("past_value_cross", i, true); + dumper->Print("", decoder_feeds[cross_value_idx]); + } #endif #ifdef DEBUG_NODE_INPUTS_OUTPUTS diff --git a/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h b/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h index 30bf3aa0a1212..8145fbd4a4123 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h +++ b/onnxruntime/contrib_ops/cpu/transformers/generation_shared.h @@ -100,6 +100,7 @@ struct ISequences { virtual gsl::span GetCurrentDeviceSequences() const = 0; // Get all current beam_index sequences in one continuous block (to pass to CUDA) virtual gsl::span GetNextDeviceSequences() = 0; // Get all next beam_index sequences in one continuous block (to pass to CUDA) virtual int GetSequenceLength() const = 0; + virtual int GetMaxLength() const = 0; }; struct ILogitsProcessorList { diff --git a/onnxruntime/contrib_ops/cpu/transformers/sequences.cc b/onnxruntime/contrib_ops/cpu/transformers/sequences.cc index 723c271897a78..ecad146da6777 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/sequences.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/sequences.cc @@ -36,6 +36,10 @@ int Sequences::GetSequenceLength() const { return current_length_; } +int Sequences::GetMaxLength() const { + return max_length_; +} + #ifdef DEBUG_GENERATION void Sequences::PrintSequences(const IConsoleDumper* dumper) const { for (int i = 0; i < batch_beam_size_; i++) { diff --git a/onnxruntime/contrib_ops/cpu/transformers/sequences.h b/onnxruntime/contrib_ops/cpu/transformers/sequences.h index 440a07e14a6cc..7dd1f28d270c7 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/sequences.h +++ b/onnxruntime/contrib_ops/cpu/transformers/sequences.h @@ -25,6 +25,9 @@ class Sequences : public ISequences { // Returns current sequence length. int GetSequenceLength() const override; + // Returns max sequence length. + int GetMaxLength() const override; + #ifdef DEBUG_GENERATION // Print the sequences to StdOut in debug mode void PrintSequences(const IConsoleDumper* dumper) const; diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc index 6c66bfc2816e4..f4e7173c917c1 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.cc @@ -156,7 +156,8 @@ Status T5DecoderSubgraph::CreateInitialFeeds( int cur_len, transformers::Sequences& sequences, int past_present_share_buffer_max_seq_len, - bool need_cache_indir) { + bool need_cache_indir, + bool use_cuda) { ORT_ENFORCE(session_state_ != nullptr, "Setup must be called before CreateInitialFeeds"); // Allocate subgraph inputs from same device as inputs of encoder subgraph. @@ -171,8 +172,9 @@ Status T5DecoderSubgraph::CreateInitialFeeds( Tensor::InitOrtValue(DataTypeImpl::GetType(), input_ids_shape, allocator, input_ids); int32_t* input_ids_data = input_ids.GetMutable()->MutableData(); AllocatorPtr buffer_allocator = std::make_shared(); - size_t total_size = static_cast(static_cast(cur_len) * batch_beam_size * sizeof(int)); - auto seq_copy = IAllocator::MakeUniquePtr(buffer_allocator, total_size, false, stream); + size_t total_size = static_cast(cur_len) * static_cast(batch_beam_size); + size_t total_size_bytes = total_size * sizeof(int); + auto seq_copy = IAllocator::MakeUniquePtr(buffer_allocator, total_size_bytes, false, stream); int* seq_copy_ptr = seq_copy.get(); if (!use_sequence_as_input_ids_) { @@ -182,19 +184,35 @@ Status T5DecoderSubgraph::CreateInitialFeeds( stream, DeviceCopyDirection::hostToDevice)); } else { - for (int i = 0; i < batch_beam_size; i++) { - gsl::span sequence = sequences.GetSequence(i); - const int32_t* sequence_data = sequence.data(); - long long seq_index = (long long)i * cur_len; - memcpy(seq_copy_ptr + seq_index, sequence_data, total_size); + if (use_cuda) { + auto sequences_buffer = sequences.GetCurrentDeviceSequences(); + for (int i = 0; i < batch_beam_size; i++) { + size_t batch_beam_stride = static_cast(i) * static_cast(sequences.GetMaxLength()); + int seq_size = sequences.GetSequenceLength(); + gsl::span sequence = sequences_buffer.subspan(batch_beam_stride, seq_size); + gsl::span temp_input(input_ids_data + static_cast(i) * seq_size, seq_size); + ORT_RETURN_IF_ERROR(device_copy_int32_func( + temp_input, + sequence, + stream, + DeviceCopyDirection::deviceToDevice)); + } + } else { + const size_t cur_len_bytes = cur_len * sizeof(int); + for (int i = 0; i < batch_beam_size; i++) { + gsl::span sequence = sequences.GetSequence(i); + const int32_t* sequence_data = sequence.data(); + ptrdiff_t seq_index = static_cast(i) * cur_len; + memcpy(seq_copy_ptr + seq_index, sequence_data, cur_len_bytes); + } + gsl::span temp_input(input_ids_data, total_size); + gsl::span temp_sequence(seq_copy_ptr, total_size); + ORT_RETURN_IF_ERROR(device_copy_int32_func( + temp_input, + temp_sequence, + stream, + DeviceCopyDirection::hostToDevice)); } - gsl::span temp_input(input_ids_data, total_size); - gsl::span temp_sequence(seq_copy_ptr, total_size); - ORT_RETURN_IF_ERROR(device_copy_int32_func( - temp_input, - temp_sequence, - stream, - DeviceCopyDirection::hostToDevice)); } // The ordering is the same as used in Setup. @@ -230,7 +248,7 @@ Status T5DecoderSubgraph::CreateInitialFeeds( num_beam, allocator, expanded_hidden_states, - true, + false, 0 /*max_sequence_length*/)); } else { ORT_RETURN_IF_ERROR(expand_buffer_float_func(stream, @@ -238,7 +256,7 @@ Status T5DecoderSubgraph::CreateInitialFeeds( num_beam, allocator, expanded_hidden_states, - true, + false, 0 /*max_sequence_length*/)); } decoder_feeds.push_back(expanded_hidden_states); diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h index 83dae49c7dcbd..a72ce37a93aba 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_t5_decoder.h @@ -48,7 +48,8 @@ class T5DecoderSubgraph : public Subgraph { int cur_len, transformers::Sequences& sequences, int past_present_share_buffer_max_seq_len = -1, - bool need_cache_indir = false); + bool need_cache_indir = false, + bool use_cuda = false); Status Validate(const std::vector& subgraph_inputs, const std::vector& subgraph_outputs) override; diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc index e047bd948434d..4e65336665bf7 100644 --- a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc +++ b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.cc @@ -1264,16 +1264,14 @@ Status UpdateDecoderFeeds( CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(input_ids_data, beam_next_tokens.data(), beam_next_tokens.size_bytes(), cudaMemcpyHostToDevice, cuda_stream)); } else { - for (int i = 0; i < batch_beam_size; i++) { - gsl::span sequence = sequences.GetSequence(i); - const int32_t* sequence_data = sequence.data(); - CUDA_RETURN_IF_ERROR( - cudaMemcpyAsync(input_ids_data + static_cast(i) * current_length, - sequence_data, - current_length * sizeof(int32_t), - cudaMemcpyHostToDevice, - cuda_stream)); - } + // We expect sequences to point directly to device memory + int max_length = sequences.GetMaxLength(); + auto sequences_buffer = sequences.GetCurrentDeviceSequences(); + CUDA_RETURN_IF_ERROR( + cudaMemcpy2DAsync(input_ids_data, current_length * sizeof(int32_t), + sequences_buffer.data(), max_length * sizeof(int32_t), + current_length * sizeof(int32_t), batch_beam_size, + cudaMemcpyDeviceToDevice, cuda_stream)); } next_inputs[0] = input_ids; diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc index ca600c0700682..8c69e2d9810b8 100644 --- a/onnxruntime/test/contrib_ops/beam_search_test.cc +++ b/onnxruntime/test/contrib_ops/beam_search_test.cc @@ -424,5 +424,19 @@ TEST(BeamSearchTest, DummyT5WithOuterScopeInitializers) { tester.RunWithConfig(); } +TEST(BeamSearchTest, DummyT5WithSequenceInputIds) { +#if defined(USE_CUDA) && defined(USE_DML) + SKIP_CUDA_TEST_WITH_DML; +#endif + ModelTester tester(CurrentTestName(), ORT_TSTR("testdata/dummy_t5_with_sequence_input_ids.onnx")); + tester.ConfigEp(DefaultCpuExecutionProvider()); + tester.AddInput("encoder_input_ids", {1, 5}, {16, 17, 1, 0, 8}); + tester.AddOutput("sequences", {1, 3, 10}, {2, 19, 18, 3, 8, 8, 8, 8, 8, 8, 2, 19, 18, 3, 10, 19, 18, 3, 8, 8, 2, 19, 18, 15, 13, 13, 13, 13, 13, 13}); +#ifdef USE_CUDA + tester.ConfigEp(DefaultCudaExecutionProvider()); +#endif + tester.RunWithConfig(); +} + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/testdata/dummy_t5_with_sequence_input_ids.onnx b/onnxruntime/test/testdata/dummy_t5_with_sequence_input_ids.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5a5c302914890e5f1c381eae3676c985e85cc0c6 GIT binary patch literal 6754 zcmds6d0bOR*M_i#D+pXxH$;4EAu1w*2$DGgcLWy%t5{3}2?9a_Nzfv#qT+%Z6$PuJ zEUs8;QPEZ*bE0Auwc=K_E|gZO`%)EAtM$EX6$SKt`}U9T_vN3Pd+(WN&YXMB%=66f z8SgT8CdZAjkjWzyQkha5EssxBi=(9~##AknizO;CcTp*njHyB{Yj$g$a@1N|unwbtqG@hN=jKQl@fb{A7~2aWaWAB1&-AmNAmb>fLdx zv}FjxHr|P4ap5wlG+I7I#TftPC~MvHIx1Q!<$PADC29`RaF|3LB~vm1jCIrC-Y{T0 zw)$8cCzHq>nIM@oF+vu|{TH-j^d+ClR3hD6T_YxtX}vq<9cZj&Q%_BvsuJTI^?apL z#;XlD*0jkqT%wMMax@qhC5e{_7)DPfm)dcj@R$VVO&i8pC5w%07`-@L7Rkj`tX4|o z@d}kptg34+mMg^daIC}oOdSmeDdZ6nb;IDJRdzacUG`;K8wtZiMsuH79IsTwu!gaU zDbZ>!`hgO4U}7xuaqCUq>j2xaZ5JJlf@Jjr;=aZ^%vcK*mnyN`;OpWtRhA@nbuj#NLQp+_i;zR6V9?Z+%!pSqU^nZfs9$aM5S)#0n^ecmW}61 zKqgn$`&jR+Ym4~v>f%3+v3L#F=mFE3TdY+RWP{Z^K-s2R6Rb1fTk-kkx_+(Hn}3SI zhRd_)9jzP-&X=By*?eUofg)3tic_c zY>ns4a9lXOOc>lb2%S% zVpJ;47JzBLfKuF*2YFsU!h@1b+!c@m36ppFXf87FT&a&vrkn8R?sMq=M_Sx@drst2ZaoeWF-Ea3Kl9GYEYA$)Q^ zS<9>K459APLUX7U9@m>olYR3vYeUxH@c2?;)>9pbbnXrEeFdbSPClu6_6ONM$_LhY z+#<(x4pR3{KTz`#M`?!LN&4N^&+yliF_>?*oWvB&Be7d|({1)!$)$7`*m`XfIeRMw zt;2Lh-X3X~z4AO{Z~u+f^fJb<9tPUat&V}v=yz$R7&eZw^YWDry4=(VYl9>;Np>c#t z_$f1z9+^=>Om9s>lk^<$U$C0&sa!(?LUo8>##AcEG~o03R$P8In{T+hjN2oiBy2Wz zw*8*Woo6AkC|(MMUHZ{3;73<&PJyhAQRKG%eDHMXfXbsE!ymd|g1h@tw0JZWj|?#r z1qVlw8=H(p)AQ`L#nDS(t7|OjyK5QIn+4GBkP9*CxC3sTs1}BpZGvCZlCZXX06Z(} zL;F|0#7;3zaF6dHGE`O5g2=hxRy2zIl;w^+M;*cqA#32i)<7$%@r02Xw=tmS1=8+p z2ySuc2Xk!#P`kPvZZ5AxW&2!2<2Ay48;Wt&JSU>)aTr&wmsm{C{ca)IYQuupEd@E&x2MqY^IYgzuv>GZ zj~*tP-4=G+{}KBBmW;aPuIN-P(9U#Rik%&b;AQS)=ox4Z6=QBv*Rx@8YSBtaxK~NP z@U+0Ym(##r-W_5NOd<=-R^kuawn5RuM|AS(VRW;1Uy)(JDm1@hO#S*5mad(!3@qoU zfaE{*dBkCFH|Ymgx}}3s=1+|a9iUhH{d8rv9Tt?XFP&y@sP!D=OXU$>S6_d-SB*cOPQy>X@ftd?Ij?bHdhrce zC&q+j>Q`;CDtd;Dv14r-r~$XAG_WXf1JMc{&e$~Z42ha!ZR%)+TpTA+O=a6eYOrsC zG_ozqQehR5f%Jvt6bKXha(>)+o7^JmQK z7t;u(LZxb;2k*vebBy|-0Swy=t8r!iyJ+D)Oe2e8+l+VXDZmBEDm<9;r3Ig03FE_s zd_XH9cVO+C?At`u4OcMSb1WcI7%jws>Bkxw7#5QOM?auT~BLUkiUBJ-3;~(9rK!CH-lj9 zo1*cCt(Z%UIwc@|O`*pz?=fX9o6@6>LN`>)`-pVNdqJ(6KpTqXKZ9-KBR5H<{~wa@ z|D4Dg^-fjP%4G8%l{My_c&$z|??z;sCW2-XrYZejZy9h@ajv=$Q*t`M&M+Uk{p3vW zyJ7^)sezE3by;{uEQS1{fw;qQA30mJkH$?3qKA{JsNP;ne7VsbD-)a$M;6n?jzdZJ zsip9H)D3zqZ5c_#A#@&e1X9ap(n+ThF>6f)tQgWx zl;!I~`VYK{{fvf_bDOr&;;I28_}m7nR`5kSv+j{&-Wt>_+(g3X42K1KKBS@;ClUQ> zsir2+km!~r;s^ZIWXSIWaKYId`0C3Pm~8tQ^*i-a_$2)Rsky!g2Re<%yRK6)?4A=G zd~8BBff@+3nhZq`Sg?9XVACEq?Bx@HCr9LIto$cH*iL;=_C;dgXN*q+yf+Detgbya(2Q~4M8d|98gnKS70=?-W z_$j$WFFpSRJ&Xb{CF>G7qbY*iL-Xm~OG$Y3lUKODA_rHy`crwu7u4ui1TXtyN*bVwpSv-`VKLX#lPND88a`Q;eg2~VRq=G|b!k?;U!}azHa7+4H zG}(FxUd&D*yQn3Y9!rCNSxuq0&u*p3HD99dw<(acek0y4oq)Po;ppGvTk_(jKK#ow z3Hz75Bxh%;i9o>Taa6IDb->^JZ7$fB+_}w;!A&6_YR|3fjbS%Z;eIh6U!Wl!?63`U zDnj6w+mYnR$J>Cpv%))2=OL|KZ9_7Hd%+Q{H_^Kl31POsK~UN@@XR_!syr5uSrg0f z!Q?{Nc0H@KytWX#I%g4wYcB9)#aem*Y@kDBjiyUUS5aKr2O_N90t+Vzi1O1+I1qb+ z+Rd9wA9=l?USr2&nr#Z|l?;VH0~f>3S3bd-UB;rk_)++!+dQ-rPr^B;?0w35bjE~- zduefS2`w(UOQP00(17#L>4TBWaZr{X=50Iyfgk09?c5^Lb^cDhev zaY`lJ%nJgSqC0pqwuE@P@5QGBcEgQzqcxi!wa10!3$SE@nMiwN35G_j#-bTJaGG~9 z*?aE<{d)a6e4aP~Z;CEJ;N3DP$WPK_pVrg#nU@b;%{Ie@Awe)<{sD4o!)Rim9F5

evRX(`*+tuU?2_NRR&j>25C zfv`@yiWWqvY4uDq*yj-jQoqHx?6Cy$stTZc5+m}?xCoBbyM59e7Lom#qj7jn3T%zv zr1A9A1Ho{w(5=%`;RD(r_3v*P$h5`^|F+^8&9u%3|1Myi ztgvKEWD4&1;terj!7y>$IY+EmofI#t-$$>riu9MWQq3CF zHxk&I8~VL@EaI=vaM|7P;*A3mfBm|&7j)oSynR&Sudi44%8}d2YV(Z5U;l^WOW+@= zkZ@p(o3;v6^@`H4g@Wht_1c!+;f_q>F%4T+4Z5vVgPtE32XhNQ3+Bzhnd|-=8-F7c literal 0 HcmV?d00001 From 8800830a446e989af7420f293f37dd18f455ba57 Mon Sep 17 00:00:00 2001 From: sushraja-msft <44513542+sushraja-msft@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:07:11 -0800 Subject: [PATCH 13/13] Implement 2d tiled matmulnbits specialized for prefill (#23058) ### Description This change implements matmul4bits with tiling both for A and B. This is beneficial for prefill scenarios on Intel integrated GPUs, because each row of A has to run through the same set of shared rows of B. This change should improve core occupancy and model_benchmark does indicate improvements for prefill. The same shader is not used for generation because when A has just a single row, the other threads in the workgroup get unused and that hurts performance. ``` -- Baseline run on an Alderlake GPU -- C:\onnxruntime>C:\model_benchmark\model_benchmark.exe -i C:\Phi-3.5-mini-instruct-onnx-web\Phi-3.5-mini-instruct-onnx-web -l 500 Batch size: 1, prompt tokens: 501, tokens to generate: 128 Prompt processing (time to first token): avg (us): 1.72338e+07 avg (tokens/s): 29.0707 << p50 (us): 1.72548e+07 stddev (us): 57012.8 n: 5 * 501 token(s) Token generation: avg (us): 79227.5 avg (tokens/s): 12.6219 p50 (us): 79284.4 stddev (us): 2109.72 n: 635 * 1 token(s) Token sampling: avg (us): 15.8198 avg (tokens/s): 63211.8 p50 (us): 14.3 stddev (us): 8.67178 n: 640 * 1 token(s) E2E generation (entire generation loop): avg (ms): 27297.8 p50 (ms): 27269.8 stddev (ms): 89.4322 n: 5 Peak working set size (bytes): 5490987008 WebGPU device lost (2): Device was destroyed. ----------------------------------- With Prefill Optimization ---- C:\onnxruntime>C:\model_benchmark\model_benchmark.exe -i C:\Phi-3.5-mini-instruct-onnx-web\Phi-3.5-mini-instruct-onnx-web -l 500 Batch size: 1, prompt tokens: 501, tokens to generate: 128 Prompt processing (time to first token): avg (us): 1.2135e+07 avg (tokens/s): 41.2856 << p50 (us): 1.21288e+07 stddev (us): 21282.1 n: 5 * 501 token(s) Token generation: avg (us): 78945.3 avg (tokens/s): 12.667 p50 (us): 78900.7 stddev (us): 2232.43 n: 635 * 1 token(s) Token sampling: avg (us): 20.5608 avg (tokens/s): 48636.3 p50 (us): 18.7 stddev (us): 19.0409 n: 640 * 1 token(s) E2E generation (entire generation loop): avg (ms): 22163.8 p50 (ms): 22160.1 stddev (ms): 31.3122 n: 5 Peak working set size (bytes): 5478862848 WebGPU device lost (2): Device was destroyed. ``` --- .../webgpu/quantization/matmul_nbits.cc | 201 +++++++++++++++--- .../webgpu/quantization/matmul_nbits.h | 14 ++ 2 files changed, 186 insertions(+), 29 deletions(-) diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc index 31f95ee64df5d..be18f820e2747 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc @@ -39,6 +39,7 @@ std::string QuantizedDataType(int components) { } } +constexpr unsigned int kMinSequenceLengthForPrefillOptimization = 16; } // namespace ONNX_OPERATOR_KERNEL_EX( @@ -321,6 +322,121 @@ Status MatMulNBitsProgram::GenerateShaderCode(ShaderHelper& shader) const { return Status::OK(); } +Status MatMulNBitsProgramPrefill::GenerateShaderCode(ShaderHelper& shader) const { + shader.AddInput("input_a", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias | ShaderUsage::UseValueTypeAlias); + shader.AddInput("input_b", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias | ShaderUsage::UseValueTypeAlias); + shader.AddInput("scales", ShaderUsage::UseUniform); + shader.AddOutput("output", ShaderUsage::UseUniform | ShaderUsage::UseValueTypeAlias | ShaderUsage::UseElementTypeAlias | ShaderUsage::UseIndicesTypeAlias); + // This shader uses uniforms with the M,N,K convention from traditional matrix multiplicatiion + // M is the number of rows in A and M rows in the output. + // N is the number of columns in B and N columns in the output. + // K is the hidden/shared dimension number of columns in A and K rows in B. + // Note in matmulnbits, B matrix is already transposed, however the following remains true + // for the shader below M describes A, N describes B and K is the hidden/shared dimension. + // K4/K8 are simply K divided by 4 or 8 respectively. + shader.AdditionalImplementation() << R"INIT_SECTION( +// Matrix dimensions and quantization parameters +const TILE_SIZE : u32 = 16u; +const VALUES_PER_VEC4 : u32 = 4u; +const QUANTIZATION_BLOCK_SIZE : u32 = 32; +// We want INNER_DIMENSION_ITEMS_PER_CYCLE to be the number of lanes in an EU/SM, +// so we use BLOCKS_PER_CYCLE as 2u, or process weights 2 blocks at a time. +// This uses all 16 lanes on 12th gen intel chips. +const BLOCKS_PER_CYCLE : u32 = 2u; +const INNER_DIMENSION_ITEMS_PER_CYCLE : u32 = 16u; // (QUANTIZATION_BLOCK_SIZE/VALUES_PER_VEC4)*BLOCKS_PER_CYCLE +const VECTORIZED_QUANTIZATION_BLOCK_SIZE: u32 = 8u; // QUANTIZATION_BLOCK_SIZE / VALUES_PER_VEC4; + +//Shared memory +var tile_A : array, TILE_SIZE>; +var tile_B : array, TILE_SIZE>; +var tile_O : array, TILE_SIZE>; + +fn loadA(slot: u32, a_global : u32, step_idx : u32, parallel_id : u32) +{ + if (a_global >= uniforms.M) { + return; + } + let local_A = input_a[a_global*uniforms.K4+step_idx*INNER_DIMENSION_ITEMS_PER_CYCLE+parallel_id]; + tile_A[slot][parallel_id] = local_A; +} + +fn getBScale(slot: u32, b_global : u32, vec_step_idx : u32, scale_idx: u32) -> output_value_t +{ + // Since scales are output_value_t holding 1 for every 32 values, vec_step_idx jumps over 64 weights at + // a time or 2 scales at every step. + let scale_offset = vec_step_idx*2; + let idx = u32(b_global*(uniforms.K/QUANTIZATION_BLOCK_SIZE)+scale_offset); + return scales[idx+scale_idx]; +} + +fn loadB(slot: u32, b_global : u32, vec_step_idx : u32, parallel_id : u32) +{ + if (b_global >= uniforms.N) { + return; + } + let scale = getBScale(slot, b_global, vec_step_idx, u32(parallel_id/VECTORIZED_QUANTIZATION_BLOCK_SIZE)); + let idx:u32 = parallel_id; + if (idx % 2 == 0) + { + // Weights are u32 holding 8 values each, each step (vec_step_idx) jumps over 64 weights at a time. + // Therefore the weight_offset begin for the current step would be vec_step_idx * 64 if weight + // elements were holding one element each. For the case of each element holding 8 values, begin + // would become vec_step_idx * 64/8 or vec_step_idx * 8. + var weight_offset:u32 = (vec_step_idx*8)+ u32(idx/2); + let b_value = input_b[b_global*uniforms.K8+weight_offset]; + let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu); + let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu); + tile_B[slot][idx].x = (output_value_t(b_value_lower[0]) - 8.0) * scale; + tile_B[slot][idx].y = (output_value_t(b_value_upper[0]) - 8.0) * scale; + tile_B[slot][idx].z = (output_value_t(b_value_lower[1]) - 8.0) * scale; + tile_B[slot][idx].w = (output_value_t(b_value_upper[1]) - 8.0) * scale; + tile_B[slot][idx+1].x = (output_value_t(b_value_lower[2]) - 8.0)* scale; + tile_B[slot][idx+1].y = (output_value_t(b_value_upper[2]) - 8.0)* scale; + tile_B[slot][idx+1].z = (output_value_t(b_value_lower[3]) - 8.0)* scale; + tile_B[slot][idx+1].w = (output_value_t(b_value_upper[3]) - 8.0)* scale; + } +} + +fn computeDotProduct(slot_a: u32, slot_b:u32) -> output_value_t +{ + var sum:output_value_t = 0; + for (var idx:u32 = 0 ; idx < INNER_DIMENSION_ITEMS_PER_CYCLE; idx++) + { + sum += dot(tile_A[slot_a][idx], tile_B[slot_b][idx]); + } + return sum; +} +)INIT_SECTION"; + + shader.MainFunctionBody() << R"MAIN_FN( + // Indexing with idx,idy instead of using a 2d dispatch of TILE_SIZE, TILE_SIZE + // appears to give a performance win on Intel Gen12LP architecture. + // This is likley because of locality of memory access, idy below in this approach + // is the same as subgroup_id or lane id, while idx is the wave_id. + // The work distribution therefore keeps memory accesses close together in + // a single wave in this approach of indexing. + let idx = u32(local_idx / TILE_SIZE); + let idy = u32(local_idx % TILE_SIZE); + let a_global_base = workgroup_id.x * TILE_SIZE; + let b_global_base = workgroup_id.y * TILE_SIZE; + let step_count:u32 = u32(uniforms.K/(BLOCKS_PER_CYCLE*QUANTIZATION_BLOCK_SIZE)); + for (var vec_step:u32 = 0; vec_step < step_count; vec_step++) + { + workgroupBarrier(); + loadA(idx, a_global_base+idx, vec_step, idy); + loadB(idx, b_global_base+idx, vec_step, idy); + workgroupBarrier(); + let result = computeDotProduct(idx, idy); + tile_O[idx][idy]+=result; + } + workgroupBarrier(); + if (a_global_base+idx < uniforms.M && b_global_base+idy < uniforms.N) { + output[(a_global_base+idx) * uniforms.N + b_global_base + idy] = tile_O[idx][idy]; + } +)MAIN_FN"; + return Status::OK(); +} + Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context) const { const Tensor* a = context.Input(0); const Tensor* b = context.Input(1); @@ -360,38 +476,65 @@ Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context context.AdapterInfo().architecture == std::string_view{"gen-12lp"} && block_size == 32; const bool has_zero_points = zero_points != nullptr; - // TODO: Support output_number > 1. Some cases are failed when output_number > 1. - // const uint32_t output_number = M > 1 && (N / components) % 2 == 0 ? 2 : 1; - constexpr uint32_t output_number = 1; - MatMulNBitsProgram program{output_number, gsl::narrow(components_b), has_zero_points, use_block32}; - - if (use_block32) { - components = 1; - constexpr uint32_t workgroup_size = 128; - const uint32_t workgroup_y = N % 8 == 0 ? 8 : N % 4 == 0 ? 4 - : 1; - const uint32_t workgroup_x = workgroup_size / workgroup_y; - program.SetWorkgroupSize(workgroup_x, workgroup_y, 1); - program.SetDispatchGroupSize(data_size / components / workgroup_y); + + if (use_block32 && batch_count == 1 && + components_a == 4 && components_b == 4 && + !has_zero_points && M >= kMinSequenceLengthForPrefillOptimization) { + MatMulNBitsProgramPrefill program; + constexpr int32_t tile_size = 16; + // subgroup_size here controls how many elements of the hidden dimension we load in a cycle. + // MatMulNBitsProgramPrefill does not use any of the subgroup wgsl instructions. The subgroup + // size just helps with optimal lane usage in the shader. + constexpr int32_t subgroup_size = 16; + program.SetWorkgroupSize(tile_size * subgroup_size); + program.SetDispatchGroupSize((M + tile_size - 1) / tile_size, + (N + tile_size - 1) / tile_size, + 1); + program + .AddInputs({{a, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(4)}, + {b, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(4)}, + {scales, ProgramTensorMetadataDependency::None}}) + .AddUniformVariables({{static_cast(M)}, + {static_cast(N)}, + {static_cast(K)}, + {static_cast(K / 4)}, + {static_cast(K / 8)}}) + .AddOutput({y, ProgramTensorMetadataDependency::TypeAndRank, gsl::narrow(1)}); + return context.RunProgram(program); } else { - program.SetDispatchGroupSize(data_size / components / output_number); - } + // TODO: Support output_number > 1. Some cases are failed when output_number > 1. + // const uint32_t output_number = M > 1 && (N / components) % 2 == 0 ? 2 : 1; + constexpr uint32_t output_number = 1; + MatMulNBitsProgram program{output_number, gsl::narrow(components_b), has_zero_points, use_block32}; + + if (use_block32) { + components = 1; + constexpr uint32_t workgroup_size = 128; + const uint32_t workgroup_y = N % 8 == 0 ? 8 : N % 4 == 0 ? 4 + : 1; + const uint32_t workgroup_x = workgroup_size / workgroup_y; + program.SetWorkgroupSize(workgroup_x, workgroup_y, 1); + program.SetDispatchGroupSize(data_size / components / workgroup_y); + } else { + program.SetDispatchGroupSize(data_size / components / output_number); + } + + TensorShape reshaped_a_shape{batch_count, M, K / components_a}; + TensorShape reshaped_b_shape{N, n_blocks_per_col, blob_size_in_words / components_b}; + TensorShape reshaped_y_shape{batch_count, M, N / components}; - TensorShape reshaped_a_shape{batch_count, M, K / components_a}; - TensorShape reshaped_b_shape{N, n_blocks_per_col, blob_size_in_words / components_b}; - TensorShape reshaped_y_shape{batch_count, M, N / components}; - - program - .AddInputs({{a, ProgramTensorMetadataDependency::TypeAndRank, reshaped_a_shape, gsl::narrow(components_a)}, - {b, ProgramTensorMetadataDependency::TypeAndRank, reshaped_b_shape, gsl::narrow(components_b * 4 /** b will be accessed as uint32 which includs 4 uint8. So here we need to multiply 4.*/)}, - {scales, ProgramTensorMetadataDependency::None}}) - .AddOutput({y, ProgramTensorMetadataDependency::TypeAndRank, reshaped_y_shape, gsl::narrow(components)}) - .AddUniformVariable({block_size}) - .CacheHint(std::to_string(output_number)); - if (has_zero_points) { - program.AddInput({zero_points, ProgramTensorMetadataDependency::None, {(zero_points->Shape().Size() + 3) / 4}, 4}); + program + .AddInputs({{a, ProgramTensorMetadataDependency::TypeAndRank, reshaped_a_shape, gsl::narrow(components_a)}, + {b, ProgramTensorMetadataDependency::TypeAndRank, reshaped_b_shape, gsl::narrow(components_b * 4 /** b will be accessed as uint32 which includs 4 uint8. So here we need to multiply 4.*/)}, + {scales, ProgramTensorMetadataDependency::None}}) + .AddOutput({y, ProgramTensorMetadataDependency::TypeAndRank, reshaped_y_shape, gsl::narrow(components)}) + .AddUniformVariable({block_size}) + .CacheHint(std::to_string(output_number)); + if (has_zero_points) { + program.AddInput({zero_points, ProgramTensorMetadataDependency::None, {(zero_points->Shape().Size() + 3) / 4}, 4}); + } + return context.RunProgram(program); } - return context.RunProgram(program); } } // namespace webgpu diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h index c0d6b3e6379cd..5f785c03f6a5e 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h @@ -31,6 +31,20 @@ class MatMulNBitsProgram final : public Program { bool use_block32_; }; +class MatMulNBitsProgramPrefill final : public Program { + public: + MatMulNBitsProgramPrefill() : Program{"MatMulNBitsPrefill"} { + } + + Status GenerateShaderCode(ShaderHelper& sh) const override; + WEBGPU_PROGRAM_DEFINE_UNIFORM_VARIABLES( + {"M", ProgramUniformVariableDataType::Uint32}, + {"N", ProgramUniformVariableDataType::Uint32}, + {"K", ProgramUniformVariableDataType::Uint32}, + {"K4", ProgramUniformVariableDataType::Uint32}, + {"K8", ProgramUniformVariableDataType::Uint32}); +}; + class MatMulNBits final : public WebGpuKernel { public: MatMulNBits(const OpKernelInfo& info) : WebGpuKernel(info) {