diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index 2cae85a0a1c8..f185a80de3cb 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include "core/providers/common.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" @@ -12,6 +13,15 @@ using namespace CoreML::Specification; namespace onnxruntime { namespace coreml { +// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to +// filter suppported ones. +static std::set Float16Ops = { + "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal", + "Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool", + "Clip", "DepthToSpace", "Resize", "Slice", "Conv", + "ConvTranspose", "GlobalMaxPool", "Gemm", "MatMul", + "AveragePool", "MaxPool", "Reshape", "Split", "Transpose"}; + namespace { // TODO, move this to shared_library bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node, @@ -83,8 +93,9 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar } /* static */ -bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/, - const logging::Logger& logger) { +bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, + [[maybe_unused]] const OpBuilderInputParams& input_params, + const logging::Logger& logger) { if (idx >= node.InputDefs().size()) { LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range"; return false; @@ -94,20 +105,33 @@ bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderIn int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; - // currently only float is supported - if (!GetType(input, input_type, logger) || input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; + if (!GetType(input, input_type, logger)) { + LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Get Input type failed"; return false; } - return true; + // float is supported + if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + return true; + } + +// only support MLProgram for FP16 +#if defined(COREML_ENABLE_MLPROGRAM) + if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && + Float16Ops.count(node.OpType())) { + return true; + } +#endif + + LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; + return false; } bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { // We only check the type of input 0 by default // specific op builder can override this - return IsInputFloat(node, 0, input_params, logger); + return IsInputDtypeSupport(node, 0, input_params, logger); } bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const { diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h index 071008520fbd..153ae841b238 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h @@ -32,9 +32,9 @@ class BaseOpBuilder : public IOpBuilder { : allow_empty_tensor_as_input_(allow_empty_tensor_as_input) { } - // currently we only support float - static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params, - const logging::Logger& logger); + // currently we support float/float16 + static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, + const logging::Logger& logger); private: virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/, diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc index fb8e07633621..bc1eed8c1920 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc @@ -73,7 +73,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const } else if (op_type == "Sub") { coreml_op_type = "sub"; } else if (op_type == "Div") { - // we only support fp32 currently. when we add support for integers we need to check the type and use + // we support fp32/fp16 currently. when we add support for integers we need to check the type and use // "floor_div" or "real_div" accordingly coreml_op_type = "real_div"; } else if (op_type == "Pow") { @@ -138,9 +138,9 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn const logging::Logger& logger) const { // Add/Sub/Mul/Div spec says inputs must be of the same type. // Pow spec says inputs can be different types. - // We only support float for all of these inputs. - if (!IsInputFloat(node, 0, input_params, logger) || - ((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) { + // We support float/float16 for all of these inputs. + if (!IsInputDtypeSupport(node, 0, input_params, logger) || + ((node.OpType() == "Pow") && !IsInputDtypeSupport(node, 1, input_params, logger))) { return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index e02186d3aee8..d053fc5b9496 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -96,6 +96,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan()); break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan()); + break; case ONNX_NAMESPACE::TensorProto_DataType_INT32: CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan()); break; @@ -114,6 +117,11 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::spanAssign(data.begin(), data.end()); } +void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data) { + const char* data_byte_ptr = (const char*)(data.data()); + weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr + data.size_bytes()); +} + namespace { template void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParams& weight, gsl::span data) { @@ -123,6 +131,15 @@ void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParam [](T v) { return narrow(v); }); *weight.mutable_floatvalue() = std::move(weight_floats); } + +template +void CreateCoreMLWeightConvertingDataToFloat16s(CoreML::Specification::WeightParams& weight, gsl::span data) { + std::vector weight_float16s{}; + weight_float16s.reserve(data.size()); + std::transform(data.begin(), data.end(), std::back_inserter(weight_float16s), + [](T v) { return MLFloat16(narrow(v)); }); + CreateCoreMLWeight(weight, weight_float16s); +} } // namespace void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data) { @@ -195,6 +212,13 @@ void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span< tensor_value.mutable_floats()->mutable_values()->Add(data.begin(), data.end()); } +template <> +void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span data) { + const char* begin = reinterpret_cast(data.data()); + const char* end = begin + (data.size() * sizeof(MLFloat16)); + tensor_value.mutable_bytes()->mutable_values()->assign(begin, end); +} + template <> void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span data) { tensor_value.mutable_ints()->mutable_values()->Add(data.begin(), data.end()); @@ -290,6 +314,14 @@ MILSpec::Value CreateScalarTensorValue(const T& data) { // explicit specializations for types we handle so the implementation can be in the .cc file template MILSpec::Value CreateTensorValue(gsl::span data, std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); template MILSpec::Value CreateScalarTensorValue(const float& data); template MILSpec::Value CreateScalarTensorValue(const int32_t& data); diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h index 475ce79b0a81..f38afc0ec181 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h @@ -41,6 +41,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONN // Copy the float array to a coreml weight void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); +// Copy the MLFloat16 array to a coreml weight +void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); + // Copy the int32_t array to a coreml weight void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc index 7338fc18fe77..71a4fe9b1203 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc @@ -70,16 +70,17 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod } } -// This is an internal function, requires input tensor to be 2d float tensor -// TODO, add support of other data types -static Status GetTensorFloatDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor, - std::vector& transposed_data) { +// This is an internal function, requires input tensor to be 2d float/float16 tensor +template +static Status GetTensorDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor, + std::vector& transposed_data) { Initializer unpacked_tensor(tensor); - auto src_data = unpacked_tensor.DataAsSpan(); + const auto src_data = unpacked_tensor.DataAsSpan(); const auto& tensor_shape = tensor.dims(); auto x_t = SafeInt(tensor_shape[0]); auto y_t = SafeInt(tensor_shape[1]); transposed_data.resize(x_t * y_t); + for (size_t x = 0; x < x_t; x++) { for (size_t y = 0; y < y_t; y++) { transposed_data[y * x_t + x] = src_data[x * y_t + y]; @@ -121,8 +122,9 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N // B is {K, N} in ONNX spec by default, or {N, K} in Gemm if transB is true const auto K = transB ? b1 : b0; const auto N = transB ? b0 : b1; - + // we already checked it and dtype must be existed. #if defined(COREML_ENABLE_MLPROGRAM) + auto input_dtype = a.TypeAsProto()->tensor_type().elem_type(); if (model_builder.CreateMLProgram()) { using namespace CoreML::Specification::MILSpec; @@ -136,13 +138,19 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N if (transB) { AddOperationInput(*gemm_op, "weight", b.Name()); } else { - // transpose from {K, N} to {N, K} - std::vector weight_nk; std::vector weight_nk_shape = {N, K}; - ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, weight_nk)); - - AddOperationInput(*gemm_op, "weight", - model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape)); + // transpose from {K, N} to {N, K} + if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + std::vector weight_nk; // use bytes to store the type-erased data, could be any data-type + ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, weight_nk)); + AddOperationInput(*gemm_op, "weight", + model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape)); + } else { // TensorProto_DataType_FLOAT16 + std::vector weight_nk; // use bytes to store the type-erased data, could be any data-type + ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, weight_nk)); + AddOperationInput(*gemm_op, "weight", + model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape)); + } } if (input_defs.size() == 3) { @@ -155,15 +163,28 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N AddOperationInput(*gemm_op, "bias", bias_arg.Name()); } else { Initializer unpacked_tensor(bias); - auto bias_data = unpacked_tensor.DataAsSpan(); std::string_view bias_data_name; - if (bias_data.size() == 1) { - // expand scalar to N - std::vector expanded_bias_data(N, bias_data[0]); - bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data); - } else { - // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()}) - bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data); + + if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + auto bias_data = unpacked_tensor.DataAsSpan(); + if (bias_data.size() == 1) { + // expand scalar to N + std::vector expanded_bias_data(N, bias_data[0]); + bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data); + } else { + // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()}) + bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data); + } + } else { // TensorProto_DataType_FLOAT16 + auto bias_data = unpacked_tensor.DataAsSpan(); + if (bias_data.size() == 1) { + // expand scalar to N + std::vector expanded_bias_data(N, bias_data[0]); + bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data); + } else { + // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()}) + bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data); + } } AddOperationInput(*gemm_op, "bias", bias_data_name); @@ -202,7 +223,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), *b_initializer)); } else { std::vector b_transposed; - ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, b_transposed)); + ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, b_transposed)); CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed); } diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index 3403378d5911..335ca737081b 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -3,6 +3,7 @@ #include "core/providers/common.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" #include "core/providers/coreml/builders/model_builder.h" @@ -14,6 +15,7 @@ namespace coreml { class UnaryOpBuilder : public BaseOpBuilder { Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const override; + bool SupportsMLProgram() const override { return true; } }; Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, @@ -21,21 +23,54 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const const auto& op_type(node.OpType()); const auto& input_defs(node.InputDefs()); - std::unique_ptr layer = model_builder.CreateNNLayer(node); +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; - if (op_type == "Sqrt") { - layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT); - } else if (op_type == "Reciprocal") { - layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); - } + std::string_view coreml_op_type; + if (op_type == "Sqrt") { + coreml_op_type = "sqrt"; + } else if (op_type == "Reciprocal") { + coreml_op_type = "inverse"; + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "UnaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type); + } + + std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); + AddOperationInput(*op, "x", input_defs[0]->Name()); + if (op_type == "Reciprocal") { + float epsilon = 1e-4; // epsilon: const T (Optional, default=1e-4) + auto dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon)); + } else if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { + AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", MLFloat16(epsilon))); + } + } + + AddOperationOutput(*op, *node.OutputDefs()[0]); - *layer->mutable_input()->Add() = input_defs[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + model_builder.AddOperation(std::move(op)); + } else // NOLINT +#endif // defined (COREML_ENABLE_MLPROGRAM) + { + std::unique_ptr layer = model_builder.CreateNNLayer(node); - model_builder.AddLayer(std::move(layer)); + if (op_type == "Sqrt") { + layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT); + } else if (op_type == "Reciprocal") { + layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); + } + + *layer->mutable_input()->Add() = input_defs[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + + model_builder.AddLayer(std::move(layer)); + } return Status::OK(); } diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index 9668bfcd09ad..50faebf06875 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -639,6 +639,14 @@ std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::st return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); } +template <> +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { + auto input_value = CreateTensorValue(value, shape); + return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); +} + template <> std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span value, @@ -811,6 +819,9 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: multi_array->set_datatype(ArrayFeatureType::FLOAT32); break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + multi_array->set_datatype(ArrayFeatureType::FLOAT16); + break; case ONNX_NAMESPACE::TensorProto_DataType_INT32: multi_array->set_datatype(ArrayFeatureType::INT32); break; diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h index bb791fb90290..b3dfec29872a 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.h +++ b/onnxruntime/core/providers/coreml/builders/model_builder.h @@ -107,11 +107,12 @@ class ModelBuilder { std::string_view AddConstant(std::string_view op_type, std::string_view value_type, gsl::span value, std::optional> shape = std::nullopt) { static_assert(std::is_same_v || + std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v, // add specialization in AddConstantImpl for new types if needed - "AddConstant currently supports float, int64_t, std::string and bool."); + "AddConstant currently supports float, MLFloat16, int64_t, std::string and bool."); return AddConstantImpl(op_type, value_type, value, shape); } diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 68460ff7c9b3..97e157d73837 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -120,6 +120,10 @@ Status CreateInputFeatureProvider(const std::unordered_map +void StrideCopy(const T* src_buffer, T* dst_buffer, size_t block_size, + size_t num_blocks, size_t src_stride, size_t dst_stride) { + for (size_t idx = 0; idx < num_blocks; ++idx) { + std::copy_n(src_buffer, block_size, dst_buffer); + src_buffer += src_stride; + dst_buffer += dst_stride; + } +} + Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buffer, const MLMultiArray* array, const int64_t num_blocks, const int64_t block_size, const int64_t stride, @@ -196,25 +210,21 @@ Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buff case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { const auto* src_buffer = static_cast(mlmultiarray_buffer); auto* dst_buffer = static_cast(tensor_buffer); - const auto block_byte_size = block_size * sizeof(float); + StrideCopy(src_buffer, dst_buffer, block_size, num_blocks, stride, block_size); + + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: { + const auto* src_buffer = static_cast(mlmultiarray_buffer); + auto* dst_buffer = static_cast(tensor_buffer); + StrideCopy(src_buffer, dst_buffer, block_size, num_blocks, stride, block_size); - for (int64_t idx = 0; idx < num_blocks; ++idx) { - memcpy(dst_buffer, src_buffer, block_byte_size); - src_buffer += stride; - dst_buffer += block_size; - } break; } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { const auto* src_buffer = static_cast(mlmultiarray_buffer); auto* dst_buffer = static_cast(tensor_buffer); - const auto block_byte_size = block_size * sizeof(int32_t); - - for (int64_t idx = 0; idx < num_blocks; ++idx) { - memcpy(dst_buffer, src_buffer, block_byte_size); - src_buffer += stride; - dst_buffer += block_size; - } + StrideCopy(src_buffer, dst_buffer, block_size, num_blocks, stride, block_size); break; } diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index bd3d21d4929f..ffb7d92a794d 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -22,40 +22,93 @@ std::vector MakeMLFloat16(const std::initializer_list& input) return output; } -#if defined(USE_CUDA) || defined(USE_ROCM) -void TestFloat16(const char* op_name, const std::vector& lhs_dim, - const std::initializer_list& lhs_values, const std::vector& rhs_dim, - const std::initializer_list& rhs_values, const std::vector& out_dim, - const std::initializer_list& out_values) { +void TestBinaryFloat16(const char* op_name, + const std::vector& lhs_dim, + const std::initializer_list& lhs_values, + const std::vector& rhs_dim, + const std::initializer_list& rhs_values, + const std::vector& out_dim, + const std::initializer_list& out_values, + bool enable_bf16 = true) { + { + std::vector> execution_providers; +#ifdef COREML_ENABLE_MLPROGRAM + execution_providers.push_back(DefaultCoreMLExecutionProvider(true)); +#elif USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + if (execution_providers.size() > 0) { + OpTester tester(op_name, 14); + tester.AddInput("A", lhs_dim, MakeMLFloat16(lhs_values)); + tester.AddInput("B", rhs_dim, MakeMLFloat16(rhs_values)); + tester.AddOutput("C", out_dim, MakeMLFloat16(out_values)); + + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + } { - OpTester tester(op_name, 14); - tester.AddInput("A", lhs_dim, MakeMLFloat16(lhs_values)); - tester.AddInput("B", rhs_dim, MakeMLFloat16(rhs_values)); - tester.AddOutput("C", out_dim, MakeMLFloat16(out_values)); std::vector> execution_providers; #ifdef USE_CUDA execution_providers.push_back(DefaultCudaExecutionProvider()); #elif USE_ROCM execution_providers.push_back(DefaultRocmExecutionProvider()); #endif - tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + + if (enable_bf16 && execution_providers.size() > 0) { + OpTester tester(op_name, 14); + tester.AddInput("A", lhs_dim, MakeBFloat16(lhs_values)); + tester.AddInput("B", rhs_dim, MakeBFloat16(rhs_values)); + tester.AddOutput("C", out_dim, MakeBFloat16(out_values)); + + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } + } +} + +void TestUnaryFloat16(const char* op_name, + const std::vector& lhs_dim, + const std::initializer_list& lhs_values, + const std::vector& out_dim, + const std::initializer_list& out_values, + int opset = 14, + bool run_bf16 = true) { + { + std::vector> execution_providers; +#ifdef COREML_ENABLE_MLPROGRAM + execution_providers.push_back(DefaultCoreMLExecutionProvider(true)); +#elif USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + if (execution_providers.size() > 0) { + OpTester tester(op_name, opset); + tester.AddInput("A", lhs_dim, MakeMLFloat16(lhs_values)); + tester.AddOutput("C", out_dim, MakeMLFloat16(out_values)); + + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } } { - OpTester tester(op_name, 14); - tester.AddInput("A", lhs_dim, MakeBFloat16(lhs_values)); - tester.AddInput("B", rhs_dim, MakeBFloat16(rhs_values)); - tester.AddOutput("C", out_dim, MakeBFloat16(out_values)); std::vector> execution_providers; #ifdef USE_CUDA execution_providers.push_back(DefaultCudaExecutionProvider()); #elif USE_ROCM execution_providers.push_back(DefaultRocmExecutionProvider()); #endif - tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + + if (run_bf16 && execution_providers.size() > 0) { + OpTester tester(op_name, opset); + tester.AddInput("A", lhs_dim, MakeBFloat16(lhs_values)); + tester.AddOutput("C", out_dim, MakeBFloat16(out_values)); + + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } } } -#endif void TestBFloat16(const char* op_name, const std::vector& lhs_dim, const std::initializer_list& lhs_values, const std::vector& rhs_dim, @@ -163,9 +216,7 @@ TEST(MathOpTest, Add_float) { test.Run(); #endif -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Add", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Add", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -202,9 +253,7 @@ TEST(MathOpTest, Add_Broadcast_Axis) { test.AddOutput("C", dims, out_values); test.Run(OpTester::ExpectResult::kExpectSuccess, ""); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", dims, lhs_values, {3, 1}, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Add", dims, lhs_values, {3, 1}, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Add", dims, lhs_values, {3, 1}, rhs_values, dims, out_values); @@ -228,9 +277,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalAB) { {kTensorrtExecutionProvider}); // TensorRT: got C with shape [3, 1] #endif -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", {3, 1}, lhs_values, {3}, rhs_values, {3, 3}, out_values); -#endif + TestBinaryFloat16("Add", {3, 1}, lhs_values, {3}, rhs_values, {3, 3}, out_values); #if defined(USE_DNNL) TestBFloat16("Add", {3, 1}, lhs_values, {3}, rhs_values, {3, 3}, out_values); @@ -254,9 +301,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalBA) { {kTensorrtExecutionProvider}); // TensorRT: got C with shape [3, 1] #endif -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", {3}, lhs_values, {3, 1}, rhs_values, {3, 3}, out_values); -#endif + TestBinaryFloat16("Add", {3}, lhs_values, {3, 1}, rhs_values, {3, 3}, out_values); #if defined(USE_DNNL) TestBFloat16("Add", {3}, lhs_values, {3, 1}, rhs_values, {3, 3}, out_values); @@ -527,9 +572,7 @@ TEST(MathOpTest, Sub) { test.AddOutput("C", dims, out_values); test.Run(); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Sub", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Sub", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Sub", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -584,9 +627,7 @@ TEST(MathOpTest, Mul) { test.Run(); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Mul", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Mul", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Mul", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -622,9 +663,7 @@ TEST(MathOpTest, Div) { test.AddOutput("C", dims, out_values); test.Run(); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Div", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Div", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Div", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -772,13 +811,12 @@ TEST(MathOpTest, Ceil_double) { TEST(MathOpTest, Reciprocal) { OpTester test("Reciprocal"); std::vector dims{2, 2}; - test.AddInput("X", dims, - {1.0f, 2.0f, - -1.0f, -2.0f}); - test.AddOutput("Y", dims, - {1.0f, 0.5f, - -1.0f, -0.5f}); + std::initializer_list inputs = {1.0f, 2.0f, -1.0f, -2.0f}; + std::initializer_list outputs = {1.0f, 0.5f, -1.0f, -0.5f}; + test.AddInput("X", dims, inputs); + test.AddOutput("Y", dims, outputs); test.Run(); + TestUnaryFloat16("Reciprocal", dims, inputs, dims, outputs, 12, false); } TEST(MathOpTest, Reciprocal_double) { @@ -795,14 +833,13 @@ TEST(MathOpTest, Reciprocal_double) { TEST(MathOpTest, Sqrt_Float) { OpTester test("Sqrt"); + std::initializer_list inputs = {1.0f, 4.0f, 0.0f, 9.0f}; + std::initializer_list outputs = {1.0f, 2.0f, 0.0f, 3.0f}; std::vector dims{2, 2}; - test.AddInput("X", dims, - {1.0f, 4.0f, - 0.0f, 9.0f}); - test.AddOutput("Y", dims, - {1.0f, 2.0f, - 0.0f, 3.0f}); + test.AddInput("X", dims, inputs); + test.AddOutput("Y", dims, outputs); test.Run(); + TestUnaryFloat16("Sqrt", dims, inputs, dims, outputs); } #if defined(USE_DNNL) || defined(USE_CUDA) @@ -1056,24 +1093,13 @@ TEST(MathOpTest, Pow_double_int64) { test.Run(); } -#if defined(USE_CUDA) || defined(USE_ROCM) TEST(MathOpTest, Pow_float16_float16) { - OpTester test("Pow", 12); std::vector dims{4}; - - test.AddInput("X", dims, MakeMLFloat16({2.0f, 2.0f, std::sqrt(2.0f), 1.0f})); - test.AddInput("Y", dims, MakeMLFloat16({0.0f, 8.0f, 2.0f, 9.0f})); - test.AddOutput("Z", dims, MakeMLFloat16({1.0f, 256.0f, 2.0f, 1.0f})); - - std::vector> execution_providers; -#ifdef USE_CUDA - execution_providers.push_back(DefaultCudaExecutionProvider()); -#elif USE_ROCM - execution_providers.push_back(DefaultRocmExecutionProvider()); -#endif - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + TestBinaryFloat16("Pow", dims, {2.0f, 2.0f, std::sqrt(2.0f), 1.0f}, dims, {0.0f, 8.0f, 2.0f, 9.0f}, + dims, {1.0f, 256.0f, 2.0f, 1.0f}, false); } +#if defined(USE_CUDA) || defined(USE_ROCM) TEST(MathOpTest, Pow_float_float16) { OpTester test("Pow", 12); std::vector dims{4}; @@ -3660,5 +3686,6 @@ TEST(MathOpTest, BitwiseNot_uint8) { test.AddOutput("Y", dims, {254, 251, 250, 252}); test.Run(); } + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc index 7ec84d87b2a8..c21e353ca2fb 100644 --- a/onnxruntime/test/providers/cpu/math/gemm_test.cc +++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc @@ -25,7 +25,7 @@ const constexpr auto run_with_tunable_op = &run_options; } // namespace -// Only CUDA and ROCM kernel has float 16 support +// Only CUDA, ROCM and CoreML kernels have float 16 support TEST(GemmOpTest, GemmNoTrans_f16) { #ifdef USE_CUDA int min_cuda_architecture = 530; @@ -34,36 +34,132 @@ TEST(GemmOpTest, GemmNoTrans_f16) { return; } #endif - OpTester test("Gemm", 13); - test.AddAttribute("transA", (int64_t)0); - test.AddAttribute("transB", (int64_t)0); - test.AddAttribute("alpha", 1.0f); - test.AddAttribute("beta", 1.0f); + std::vector A{1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}; + std::vector B = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f, 1.0f, -2.1f, 1.3f, 4.1f, 1.3f, -8.1f}; + std::vector C = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f}; + + std::vector f_A(8); + std::vector f_B(12); + ConvertFloatToMLFloat16(A.data(), f_A.data(), 8); + ConvertFloatToMLFloat16(B.data(), f_B.data(), 12); + + { + // bias has same shape as output + std::vector f_Y(6); + std::vector Y{19.8f, 0.7f, -25.7f, -19.6f, 0.2f, 27.1f}; + ConvertFloatToMLFloat16(Y.data(), f_Y.data(), 6); + + std::vector f_C(6); + ConvertFloatToMLFloat16(C.data(), f_C.data(), 6); + + OpTester test("Gemm", 13); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)0); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + test.AddInput("A", {2, 4}, f_A); + test.AddInput("B", {4, 3}, f_B); + test.AddInput("C", {2, 3}, f_C); + test.AddOutput("Y", {2, 3}, f_Y); + test.SetOutputTolerance(0.005f); + test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported + .Config(run_with_tunable_op) + .RunWithConfig(); + } + { + // bias has shape {1, output_features} + std::vector f_Y(6); + std::vector Y{19.8f, 0.7f, -25.7f, -18.8f, 3.5f, 28.1f}; + ConvertFloatToMLFloat16(Y.data(), f_Y.data(), 6); + + std::vector f_C(3); + ConvertFloatToMLFloat16(C.data(), f_C.data(), 3); + // CoreML program require B/C are constant + OpTester test("Gemm", 13); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)0); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + test.AddInput("A", {2, 4}, f_A); + test.AddInput("B", {4, 3}, f_B, true); + test.AddInput("C", {3}, f_C, true); + test.AddOutput("Y", {2, 3}, f_Y); + test.SetOutputTolerance(0.005f); + test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported + .Config(run_with_tunable_op) + .RunWithConfig(); + } + { + // bias is a scalar + std::vector f_Y(6); + std::vector Y{19.8f, -0.9f, -26.4f, -18.8f, 1.9f, 27.4f}; + ConvertFloatToMLFloat16(Y.data(), f_Y.data(), 6); + + std::vector f_C(1); + ConvertFloatToMLFloat16(C.data(), f_C.data(), 1); + OpTester test("Gemm", 13); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)0); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + test.AddInput("A", {2, 4}, f_A); + test.AddInput("B", {4, 3}, f_B, true); + test.AddInput("C", {1}, f_C, true); + test.AddOutput("Y", {2, 3}, f_Y); + test.SetOutputTolerance(0.005f); + test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported + .Config(run_with_tunable_op) + .RunWithConfig(); + } +} + +// Only CUDA, ROCM and CoreML kernels have float 16 support +TEST(GemmOpTest, GemmTransB_f16) { +#ifdef USE_CUDA + int min_cuda_architecture = 530; + if (!HasCudaEnvironment(min_cuda_architecture)) { + LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16"; + return; + } +#endif std::vector A{1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f}; - std::vector B(12, 1.0f); - std::vector C(6, 1.0f); - std::vector Y{11.0f, 11.0f, 11.0f, - -9.0f, -9.0f, -9.0f}; + std::vector B = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f, 1.0f, -2.1f, 1.3f, 4.1f, 1.3f, -8.1f}; + std::vector C = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f}; std::vector f_A(8); std::vector f_B(12); - std::vector f_C(6); - std::vector f_Y(6); ConvertFloatToMLFloat16(A.data(), f_A.data(), 8); ConvertFloatToMLFloat16(B.data(), f_B.data(), 12); - ConvertFloatToMLFloat16(C.data(), f_C.data(), 6); - ConvertFloatToMLFloat16(Y.data(), f_Y.data(), 6); - - test.AddInput("A", {2, 4}, f_A); - test.AddInput("B", {4, 3}, f_B); - test.AddInput("C", {2, 3}, f_C); - test.AddOutput("Y", {2, 3}, f_Y); - test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported - .Config(run_with_tunable_op) - .RunWithConfig(); + { + // bias is a scalar and transB is True + std::vector f_Y(6); + std::vector Y{7.6f, -5.7f, -18.5f, -6.6f, 6.7f, 19.5f}; + ConvertFloatToMLFloat16(Y.data(), f_Y.data(), 6); + + std::vector f_C(1); + ConvertFloatToMLFloat16(C.data(), f_C.data(), 1); + OpTester test("Gemm", 13); + + test.AddAttribute("transA", (int64_t)0); + test.AddAttribute("transB", (int64_t)1); + test.AddAttribute("alpha", 1.0f); + test.AddAttribute("beta", 1.0f); + test.AddInput("A", {2, 4}, f_A); + test.AddInput("B", {3, 4}, f_B, true); + test.AddInput("C", {1}, f_C, true); + test.AddOutput("Y", {2, 3}, f_Y); + test.SetOutputTolerance(0.005f); + test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported + .Config(run_with_tunable_op) + .RunWithConfig(); + } } #if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DNNL) diff --git a/onnxruntime/test/providers/cpu/math/matmul_test.cc b/onnxruntime/test/providers/cpu/math/matmul_test.cc index 90370560859a..a7d2281ac19f 100644 --- a/onnxruntime/test/providers/cpu/math/matmul_test.cc +++ b/onnxruntime/test/providers/cpu/math/matmul_test.cc @@ -246,7 +246,7 @@ TEST(MathOpTest, MatMulZeroKInt32Type) { RunMatMulZeroKTest(); } -#if defined(USE_CUDA) || defined(USE_ROCM) +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(COREML_ENABLE_MLPROGRAM) TEST(MathOpTest, MatMul_Float16) { #ifdef USE_CUDA int min_cuda_architecture = 530; diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc index 95b274966fbb..285f9ad05fef 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc @@ -3,7 +3,7 @@ #include "core/mlas/inc/mlas.h" -#ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED +#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) || defined(COREML_ENABLE_MLPROGRAM) || defined(USE_QNN) #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" @@ -28,6 +28,17 @@ struct ConvOpAndTestAttributes { vector activation_parameters = {}; }; +/* +Please notice that, we have predefined macros in the head of the file +#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) || defined(COREML_ENABLE_MLPROGRAM)|| defined(USE_QNN). +When we have these two macro defines, this UT will turn into green light and work. + +`NhwcFusedConv` in FP16 dtype is a contribe op and not well support by basic CPU ep. +Once your EP can't satisfy all the conditions and capture the op, UT will crash as there +is no appropriate ep can handle this node. +As What CoreML did, if attributes has activation fused in, we should exclude CoreML ep +to let the test pass. +*/ void TestConvFp16Op(const ConvOpAndTestAttributes& attributes, const vector>& inputs, const vector>& input_shapes, @@ -82,10 +93,14 @@ void TestConvFp16Op(const ConvOpAndTestAttributes& attributes, // Disable TensorRT because weight as input is not supported excluded_providers.insert(kTensorrtExecutionProvider); // QNN have issue with dynamic weight, auto pad with SAME_UPPER, SAME_LOWER - if (!weight_is_initializer || attributes.auto_pad == "SAME_UPPER" || attributes.auto_pad == "SAME_LOWER") { + if (!weight_is_initializer || attributes.auto_pad == "SAME_UPPER" || + attributes.auto_pad == "SAME_LOWER" || + !attributes.activation.empty()) { excluded_providers.insert(kQnnExecutionProvider); } - + if (!weight_is_initializer || !attributes.activation.empty()) { + excluded_providers.insert(kCoreMLExecutionProvider); + } tester->Run(expect_result, err_str, excluded_providers); } @@ -1147,6 +1162,7 @@ TEST(ConvFp16Test, Pointwise_Relu) { MLFloat16(17.5f), MLFloat16(9.5f)}; TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } TEST(ConvFp16Test, Conv2D_HardSigmoid) { @@ -1176,6 +1192,7 @@ TEST(ConvFp16Test, Conv2D_HardSigmoid) { MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f)}; TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } TEST(ConvFp16Test, Conv2D_Bias_Z_Relu) { @@ -1205,6 +1222,7 @@ TEST(ConvFp16Test, Conv2D_Bias_Z_Relu) { vector Z_shape = {1, 2, 2, 2}; auto expected_vals = {MLFloat16(12.0f), MLFloat16(11.0f), MLFloat16(17.0f), MLFloat16(15.0f), MLFloat16(25.0f), MLFloat16(23.0f), MLFloat16(29.0f), MLFloat16(28.0f)}; TestConvFp16Op(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape, true); } #endif // CONTRIB_OPS diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 6bc0f8d10549..606b8d580fa3 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -55,6 +55,11 @@ void VerifyOutput(const std::string& output_name, ::testing::Pointwise(::testing::FloatNear(fp32_abs_err), tensor.DataAsSpan())); break; } + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: { + EXPECT_THAT(expected_tensor.DataAsSpan(), + ::testing::Pointwise(::testing::FloatNear(fp32_abs_err), tensor.DataAsSpan())); + break; + } default: ORT_THROW("Unhandled data type. Please add 'case' statement for ", element_type); } diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md index bb4cfb2e09dc..ae0769e7fb93 100644 --- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md +++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md @@ -20,6 +20,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:MaxPool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.| |ai.onnx:Mul|| |ai.onnx:Pow|Only supports cases when both inputs are fp32.| +|ai.onnx:Reciprocal|this ask for a `epislon` (default 1e-4) where onnx don't provide| |ai.onnx:Relu|| |ai.onnx:Reshape|| |ai.onnx:Resize|See [resize_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc) implementation. There are too many permutations to describe the valid combinations.| @@ -27,5 +28,6 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:Split|If provided, `splits` must be constant.| |ai.onnx:Sub|| |ai.onnx:Sigmoid|| +|ai.onnx:Sqrt|| |ai.onnx:Tanh|| |ai.onnx:Transpose||