Skip to content

Commit

Permalink
[CoreML MLProgram] Support Float16 (1/N) (microsoft#22068)
Browse files Browse the repository at this point in the history
### Description
Support Float16 for CoreML MLProgram EP.
Operations:
    "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal",
"Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample",
"GlobalAveragePool",
    "Clip", "DepthToSpace", "Resize", "Slice", "Conv",
    "ConvTranspose", "GlobalMaxPool", "Gemm", "MatMul",
    "AveragePool", "MaxPool", "Reshape", "Split", "Transpose"

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

---------

Co-authored-by: Scott McKay <[email protected]>
  • Loading branch information
wejoncy and skottmckay authored Sep 30, 2024
1 parent 434f0fa commit 2cfe1f0
Show file tree
Hide file tree
Showing 35 changed files with 1,427 additions and 849 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,13 @@ Status ActivationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
if (add_alpha) {
NodeAttrHelper helper(node);
const auto alpha = helper.Get("alpha", 0.01f);
AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", alpha));

auto input_dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", alpha));
} else {
AddOperationInput(*op, "alpha", model_builder.AddScalarConstant(op->type(), "alpha", MLFloat16(alpha)));
}
}

AddOperationOutput(*op, *node.OutputDefs()[0]);
Expand Down
38 changes: 31 additions & 7 deletions onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <set>
#include "core/providers/common.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
Expand All @@ -12,6 +13,15 @@ using namespace CoreML::Specification;
namespace onnxruntime {
namespace coreml {

// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to
// filter suppported ones.
static std::set<std::string> Float16Ops = {
"Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal",
"Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool",
"Clip", "DepthToSpace", "Resize", "Slice", "Conv",
"ConvTranspose", "GlobalMaxPool", "Gemm", "MatMul",
"AveragePool", "MaxPool", "Reshape", "Split", "Transpose"};

namespace {
// TODO, move this to shared_library
bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node,
Expand Down Expand Up @@ -83,8 +93,9 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar
}

/* static */
bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/,
const logging::Logger& logger) {
bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx,
[[maybe_unused]] const OpBuilderInputParams& input_params,
const logging::Logger& logger) {
if (idx >= node.InputDefs().size()) {
LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range";
return false;
Expand All @@ -94,20 +105,33 @@ bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderIn

int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;

// currently only float is supported
if (!GetType(input, input_type, logger) || input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
if (!GetType(input, input_type, logger)) {
LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Get Input type failed";
return false;
}

return true;
// float is supported
if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
return true;
}

// only support MLProgram for FP16
#if defined(COREML_ENABLE_MLPROGRAM)
if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 &&
Float16Ops.count(node.OpType())) {
return true;
}
#endif

LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
return false;
}

bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const {
// We only check the type of input 0 by default
// specific op builder can override this
return IsInputFloat(node, 0, input_params, logger);
return IsInputDtypeSupport(node, 0, input_params, logger);
}

bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class BaseOpBuilder : public IOpBuilder {
: allow_empty_tensor_as_input_(allow_empty_tensor_as_input) {
}

// currently we only support float
static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
const logging::Logger& logger);
// currently we support float/float16
static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
const logging::Logger& logger);

private:
virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
} else if (op_type == "Sub") {
coreml_op_type = "sub";
} else if (op_type == "Div") {
// we only support fp32 currently. when we add support for integers we need to check the type and use
// we support fp32/fp16 currently. when we add support for integers we need to check the type and use
// "floor_div" or "real_div" accordingly
coreml_op_type = "real_div";
} else if (op_type == "Pow") {
Expand Down Expand Up @@ -138,9 +138,22 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn
const logging::Logger& logger) const {
// Add/Sub/Mul/Div spec says inputs must be of the same type.
// Pow spec says inputs can be different types.
// We only support float for all of these inputs.
if (!IsInputFloat(node, 0, input_params, logger) ||
((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) {
// We support float/float16 for all of these inputs.

if (node.OpType() == "Pow") {
const auto& input0 = *node.InputDefs()[0];
const auto& input1 = *node.InputDefs()[1];
int32_t input_type0 = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
int32_t input_type1 = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
if (!GetType(input0, input_type0, logger)) {
return false;
}
if (!GetType(input1, input_type1, logger) || input_type1 != input_type0) {
return false;
}
}

if (!IsInputDtypeSupport(node, 0, input_params, logger)) {
return false;
}

Expand Down
32 changes: 32 additions & 0 deletions onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight,
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<float>());
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<MLFloat16>());
break;
case ONNX_NAMESPACE::TensorProto_DataType_INT32:
CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<int32_t>());
break;
Expand All @@ -114,6 +117,11 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<c
weight.mutable_floatvalue()->Assign(data.begin(), data.end());
}

void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const MLFloat16> data) {
const char* data_byte_ptr = reinterpret_cast<const char*>(data.data());
weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr + data.size_bytes());
}

namespace {
template <typename T>
void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParams& weight, gsl::span<const T> data) {
Expand All @@ -123,6 +131,15 @@ void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParam
[](T v) { return narrow<float>(v); });
*weight.mutable_floatvalue() = std::move(weight_floats);
}

template <typename T>
void CreateCoreMLWeightConvertingDataToFloat16s(CoreML::Specification::WeightParams& weight, gsl::span<const T> data) {
std::vector<MLFloat16> weight_float16s{};
weight_float16s.reserve(data.size());
std::transform(data.begin(), data.end(), std::back_inserter(weight_float16s),
[](T v) { return MLFloat16(float(v)); });
CreateCoreMLWeight(weight, weight_float16s);
}
} // namespace

void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int32_t> data) {
Expand Down Expand Up @@ -195,6 +212,13 @@ void CopyDataToTensorValue<float>(MILSpec::TensorValue& tensor_value, gsl::span<
tensor_value.mutable_floats()->mutable_values()->Add(data.begin(), data.end());
}

template <>
void CopyDataToTensorValue<MLFloat16>(MILSpec::TensorValue& tensor_value, gsl::span<const MLFloat16> data) {
const char* begin = reinterpret_cast<const char*>(data.data());
const char* end = begin + (data.size() * sizeof(MLFloat16));
tensor_value.mutable_bytes()->mutable_values()->assign(begin, end);
}

template <>
void CopyDataToTensorValue<int32_t>(MILSpec::TensorValue& tensor_value, gsl::span<const int32_t> data) {
tensor_value.mutable_ints()->mutable_values()->Add(data.begin(), data.end());
Expand Down Expand Up @@ -290,6 +314,14 @@ MILSpec::Value CreateScalarTensorValue(const T& data) {
// explicit specializations for types we handle so the implementation can be in the .cc file
template MILSpec::Value CreateTensorValue<int64_t, int32_t>(gsl::span<const int64_t> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<float, float>(gsl::span<const float> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<MLFloat16, MLFloat16>(gsl::span<const MLFloat16> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<bool, bool>(gsl::span<const bool> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<std::string, std::string>(gsl::span<const std::string> data,
std::optional<gsl::span<const int64_t>> shape);

template MILSpec::Value CreateScalarTensorValue(const float& data);
template MILSpec::Value CreateScalarTensorValue(const int32_t& data);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONN
// Copy the float array to a coreml weight
void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const float> data);

// Copy the MLFloat16 array to a coreml weight
void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const MLFloat16> data);

// Copy the int32_t array to a coreml weight
void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int32_t> data);

Expand Down
22 changes: 18 additions & 4 deletions onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,30 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
Operation& clip_op = *op;
AddOperationInput(clip_op, "x", input_name);

// we already checked it and dtype must be existed.
auto input_dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
// if min and max were attributes we need to add initializers. otherwise we use the existing inputs
const bool min_max_attribs = node.SinceVersion() < 11;
std::string_view min_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "min", min)
: node.InputDefs()[1]->Name();
std::string_view min_name;
if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
min_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "min", min)
: node.InputDefs()[1]->Name();
} else {
min_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "min", MLFloat16(min))
: node.InputDefs()[1]->Name();
}

AddOperationInput(clip_op, "alpha", min_name);

if (has_max) {
std::string_view max_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "max", max)
: node.InputDefs()[2]->Name();
std::string_view max_name;
if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
max_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "max", max)
: node.InputDefs()[2]->Name();
} else {
max_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "max", MLFloat16(max))
: node.InputDefs()[2]->Name();
}
AddOperationInput(clip_op, "beta", max_name);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
// we checked shape was static in IsOpSupportedImpl so this should never fail
std::vector<int64_t> input_shape;
ORT_RETURN_IF_NOT(GetStaticShape(*input_defs[0], input_shape, logger), "Failed to get input shape");
const int32_t elem_type = static_cast<int32_t>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
auto input_dtype = input_defs[0]->TypeAsProto()->tensor_type().elem_type();

const int32_t elem_type = static_cast<int32_t>(input_dtype);

// reshape to [b * c // (blocksize ** 2), blocksize, blocksize, h, w]
auto reshape1 = model_builder.CreateOperation(node, "reshape", "pre");
Expand Down
63 changes: 42 additions & 21 deletions onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,17 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
}
}

// This is an internal function, requires input tensor to be 2d float tensor
// TODO, add support of other data types
static Status GetTensorFloatDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor,
std::vector<float>& transposed_data) {
// This is an internal function, requires input tensor to be 2d float/float16 tensor
template <typename T>
static Status GetTensorDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor,
std::vector<T>& transposed_data) {
Initializer unpacked_tensor(tensor);
auto src_data = unpacked_tensor.DataAsSpan<float>();
const auto src_data = unpacked_tensor.DataAsSpan<T>();
const auto& tensor_shape = tensor.dims();
auto x_t = SafeInt<size_t>(tensor_shape[0]);
auto y_t = SafeInt<size_t>(tensor_shape[1]);
transposed_data.resize(x_t * y_t);

for (size_t x = 0; x < x_t; x++) {
for (size_t y = 0; y < y_t; y++) {
transposed_data[y * x_t + x] = src_data[x * y_t + y];
Expand Down Expand Up @@ -121,8 +122,9 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
// B is {K, N} in ONNX spec by default, or {N, K} in Gemm if transB is true
const auto K = transB ? b1 : b0;
const auto N = transB ? b0 : b1;

// we already checked it and dtype must be existed.
#if defined(COREML_ENABLE_MLPROGRAM)
auto input_dtype = a.TypeAsProto()->tensor_type().elem_type();
if (model_builder.CreateMLProgram()) {
using namespace CoreML::Specification::MILSpec;

Expand All @@ -136,13 +138,19 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
if (transB) {
AddOperationInput(*gemm_op, "weight", b.Name());
} else {
// transpose from {K, N} to {N, K}
std::vector<float> weight_nk;
std::vector<int64_t> weight_nk_shape = {N, K};
ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, weight_nk));

AddOperationInput(*gemm_op, "weight",
model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape));
// transpose from {K, N} to {N, K}
if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
std::vector<float> weight_nk;
ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, weight_nk));
AddOperationInput(*gemm_op, "weight",
model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape));
} else { // TensorProto_DataType_FLOAT16
std::vector<MLFloat16> weight_nk;
ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, weight_nk));
AddOperationInput(*gemm_op, "weight",
model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape));
}
}

if (input_defs.size() == 3) {
Expand All @@ -155,15 +163,28 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
AddOperationInput(*gemm_op, "bias", bias_arg.Name());
} else {
Initializer unpacked_tensor(bias);
auto bias_data = unpacked_tensor.DataAsSpan<float>();
std::string_view bias_data_name;
if (bias_data.size() == 1) {
// expand scalar to N
std::vector<float> expanded_bias_data(N, bias_data[0]);
bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data);
} else {
// can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()})
bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data);

if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
auto bias_data = unpacked_tensor.DataAsSpan<float>();
if (bias_data.size() == 1) {
// expand scalar to N
std::vector<float> expanded_bias_data(N, bias_data[0]);
bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data);
} else {
// can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()})
bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data);
}
} else { // TensorProto_DataType_FLOAT16
auto bias_data = unpacked_tensor.DataAsSpan<MLFloat16>();
if (bias_data.size() == 1) {
// expand scalar to N
std::vector<MLFloat16> expanded_bias_data(N, bias_data[0]);
bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data);
} else {
// can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()})
bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data);
}
}

AddOperationInput(*gemm_op, "bias", bias_data_name);
Expand Down Expand Up @@ -202,7 +223,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), *b_initializer));
} else {
std::vector<float> b_transposed;
ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, b_transposed));
ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, b_transposed));
CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed);
}

Expand Down
Loading

0 comments on commit 2cfe1f0

Please sign in to comment.