microsoft · wejoncy · Sep 10, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <set>
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
@@ -12,6 +13,15 @@
 namespace onnxruntime {
 namespace coreml {
 
+// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to
+// filter suppported ones.
+static std::set<std::string> Float16Ops = {
+    "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal",
+    "Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool",
+    "Clip", "DepthToSpace", "Resize", "Slice", "Conv",
+    "ConvTranspose", "GlobalMaxPool", "Gemm", "MatMul",
+    "AveragePool", "MaxPool", "Reshape", "Split", "Transpose"};
+
 namespace {
 // TODO, move this to shared_library
 bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node,
@@ -83,8 +93,9 @@
 }
 
 /* static */
-bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/,
-                                 const logging::Logger& logger) {
+bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx,
+                                        [[maybe_unused]] const OpBuilderInputParams& input_params,
+                                        const logging::Logger& logger) {
   if (idx >= node.InputDefs().size()) {
     LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range";
     return false;
@@ -94,20 +105,33 @@
 
   int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
 
-  // currently only float is supported
-  if (!GetType(input, input_type, logger) || input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
-    LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
+  if (!GetType(input, input_type, logger)) {
+    LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Get Input type failed";
     return false;
   }
 
-  return true;
+  // float is supported
+  if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+    return true;
+  }
+
+// only support MLProgram for FP16
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 &&
+      Float16Ops.count(node.OpType())) {
+    return true;
+  }
+#endif
+
+  LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
+  return false;
 }
 
 bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
                                            const logging::Logger& logger) const {
   // We only check the type of input 0 by default
   // specific op builder can override this
-  return IsInputFloat(node, 0, input_params, logger);
+  return IsInputDtypeSupport(node, 0, input_params, logger);
 }
 
 bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const {

diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
@@ -32,9 +32,9 @@ class BaseOpBuilder : public IOpBuilder {
       : allow_empty_tensor_as_input_(allow_empty_tensor_as_input) {
   }
 
-  // currently we only support float
-  static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
-                           const logging::Logger& logger);
+  // currently we support float/float16
+  static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
+                                  const logging::Logger& logger);
 
  private:
   virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/,

diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
@@ -73,7 +73,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     } else if (op_type == "Sub") {
       coreml_op_type = "sub";
     } else if (op_type == "Div") {
-      // we only support fp32 currently. when we add support for integers we need to check the type and use
+      // we support fp32/fp16 currently. when we add support for integers we need to check the type and use
       // "floor_div" or "real_div" accordingly
       coreml_op_type = "real_div";
     } else if (op_type == "Pow") {
@@ -138,9 +138,9 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn
                                              const logging::Logger& logger) const {
   // Add/Sub/Mul/Div spec says inputs must be of the same type.
   // Pow spec says inputs can be different types.
-  // We only support float for all of these inputs.
-  if (!IsInputFloat(node, 0, input_params, logger) ||
-      ((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) {
+  // We support float/float16 for all of these inputs.
+  if (!IsInputDtypeSupport(node, 0, input_params, logger) ||
+      ((node.OpType() == "Pow") && !IsInputDtypeSupport(node, 1, input_params, logger))) {
     return false;
   }
 

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -96,6 +96,9 @@
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
       CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<float>());
       break;
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+      CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<MLFloat16>());
+      break;
     case ONNX_NAMESPACE::TensorProto_DataType_INT32:
       CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<int32_t>());
       break;
@@ -114,6 +117,11 @@
   weight.mutable_floatvalue()->Assign(data.begin(), data.end());
 }
 
+void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const MLFloat16> data) {
+  const char* data_byte_ptr = (const char*)(data.data());
+  weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr + data.size_bytes());
+}
+
 namespace {
 template <typename T>
 void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParams& weight, gsl::span<const T> data) {
@@ -123,6 +131,15 @@
                  [](T v) { return narrow<float>(v); });
   *weight.mutable_floatvalue() = std::move(weight_floats);
 }
+
+template <typename T>
+void CreateCoreMLWeightConvertingDataToFloat16s(CoreML::Specification::WeightParams& weight, gsl::span<const T> data) {
+  std::vector<MLFloat16> weight_float16s{};
+  weight_float16s.reserve(data.size());
+  std::transform(data.begin(), data.end(), std::back_inserter(weight_float16s),
+                 [](T v) { return MLFloat16(narrow<float>(v)); });
+  CreateCoreMLWeight(weight, weight_float16s);
+}
 }  // namespace
 
 void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int32_t> data) {
@@ -195,6 +212,13 @@
   tensor_value.mutable_floats()->mutable_values()->Add(data.begin(), data.end());
 }
 
+template <>
+void CopyDataToTensorValue<MLFloat16>(MILSpec::TensorValue& tensor_value, gsl::span<const MLFloat16> data) {
+  const char* begin = reinterpret_cast<const char*>(data.data());
+  const char* end = begin + (data.size() * sizeof(MLFloat16));
+  tensor_value.mutable_bytes()->mutable_values()->assign(begin, end);
+}
+
 template <>
 void CopyDataToTensorValue<int32_t>(MILSpec::TensorValue& tensor_value, gsl::span<const int32_t> data) {
   tensor_value.mutable_ints()->mutable_values()->Add(data.begin(), data.end());
@@ -290,6 +314,14 @@
 // explicit specializations for types we handle so the implementation can be in the .cc file
 template MILSpec::Value CreateTensorValue<int64_t, int32_t>(gsl::span<const int64_t> data,
                                                             std::optional<gsl::span<const int64_t>> shape);
+template MILSpec::Value CreateTensorValue<float, float>(gsl::span<const float> data,
+                                                        std::optional<gsl::span<const int64_t>> shape);
+template MILSpec::Value CreateTensorValue<MLFloat16, MLFloat16>(gsl::span<const MLFloat16> data,
+                                                                std::optional<gsl::span<const int64_t>> shape);
+template MILSpec::Value CreateTensorValue<bool, bool>(gsl::span<const bool> data,
+                                                      std::optional<gsl::span<const int64_t>> shape);
+template MILSpec::Value CreateTensorValue<std::string, std::string>(gsl::span<const std::string> data,
+                                                                    std::optional<gsl::span<const int64_t>> shape);
 
 template MILSpec::Value CreateScalarTensorValue(const float& data);
 template MILSpec::Value CreateScalarTensorValue(const int32_t& data);

diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -41,6 +41,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONN
 // Copy the float array to a coreml weight
 void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const float> data);
 
+// Copy the MLFloat16 array to a coreml weight
+void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const MLFloat16> data);
+
 // Copy the int32_t array to a coreml weight
 void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int32_t> data);
 

diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
@@ -70,16 +70,17 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
   }
 }
 
-// This is an internal function, requires input tensor to be 2d float tensor
-// TODO, add support of other data types
-static Status GetTensorFloatDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor,
-                                           std::vector<float>& transposed_data) {
+// This is an internal function, requires input tensor to be 2d float/float16 tensor
+template <typename T>
+static Status GetTensorDataTransposed(const ONNX_NAMESPACE::TensorProto& tensor,
+                                      std::vector<T>& transposed_data) {
   Initializer unpacked_tensor(tensor);
-  auto src_data = unpacked_tensor.DataAsSpan<float>();
+  const auto src_data = unpacked_tensor.DataAsSpan<T>();
   const auto& tensor_shape = tensor.dims();
   auto x_t = SafeInt<size_t>(tensor_shape[0]);
   auto y_t = SafeInt<size_t>(tensor_shape[1]);
   transposed_data.resize(x_t * y_t);
+
   for (size_t x = 0; x < x_t; x++) {
     for (size_t y = 0; y < y_t; y++) {
       transposed_data[y * x_t + x] = src_data[x * y_t + y];
@@ -121,8 +122,9 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   // B is {K, N} in ONNX spec by default, or {N, K} in Gemm if transB is true
   const auto K = transB ? b1 : b0;
   const auto N = transB ? b0 : b1;
-
+  // we already checked it and dtype must be existed.
 #if defined(COREML_ENABLE_MLPROGRAM)
+  auto input_dtype = a.TypeAsProto()->tensor_type().elem_type();
   if (model_builder.CreateMLProgram()) {
     using namespace CoreML::Specification::MILSpec;
 
@@ -136,13 +138,19 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
       if (transB) {
         AddOperationInput(*gemm_op, "weight", b.Name());
       } else {
-        // transpose from {K, N} to {N, K}
-        std::vector<float> weight_nk;
         std::vector<int64_t> weight_nk_shape = {N, K};
-        ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, weight_nk));
-
-        AddOperationInput(*gemm_op, "weight",
-                          model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape));
+        // transpose from {K, N} to {N, K}
+        if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+          std::vector<float> weight_nk;  // use bytes to store the type-erased data, could be any data-type
+          ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, weight_nk));
+          AddOperationInput(*gemm_op, "weight",
+                            model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape));
+        } else {                             // TensorProto_DataType_FLOAT16
+          std::vector<MLFloat16> weight_nk;  // use bytes to store the type-erased data, could be any data-type
+          ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, weight_nk));
+          AddOperationInput(*gemm_op, "weight",
+                            model_builder.AddConstant(gemm_op->type(), b.Name() + "_t", weight_nk, weight_nk_shape));
+        }
       }
 
       if (input_defs.size() == 3) {
@@ -155,15 +163,28 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
           AddOperationInput(*gemm_op, "bias", bias_arg.Name());
         } else {
           Initializer unpacked_tensor(bias);
-          auto bias_data = unpacked_tensor.DataAsSpan<float>();
           std::string_view bias_data_name;
-          if (bias_data.size() == 1) {
-            // expand scalar to N
-            std::vector<float> expanded_bias_data(N, bias_data[0]);
-            bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data);
-          } else {
-            // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()})
-            bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data);
+
+          if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+            auto bias_data = unpacked_tensor.DataAsSpan<float>();
+            if (bias_data.size() == 1) {
+              // expand scalar to N
+              std::vector<float> expanded_bias_data(N, bias_data[0]);
+              bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data);
+            } else {
+              // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()})
+              bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data);
+            }
+          } else {  // TensorProto_DataType_FLOAT16
+            auto bias_data = unpacked_tensor.DataAsSpan<MLFloat16>();
+            if (bias_data.size() == 1) {
+              // expand scalar to N
+              std::vector<MLFloat16> expanded_bias_data(N, bias_data[0]);
+              bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", expanded_bias_data);
+            } else {
+              // can use data as-is but need to adjust shape (inferred by AddConstant as {bias_data.size()})
+              bias_data_name = model_builder.AddConstant(gemm_op->type(), "bias", bias_data);
+            }
           }
 
           AddOperationInput(*gemm_op, "bias", bias_data_name);
@@ -202,7 +223,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
       ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), *b_initializer));
     } else {
       std::vector<float> b_transposed;
-      ORT_RETURN_IF_ERROR(GetTensorFloatDataTransposed(*b_initializer, b_transposed));
+      ORT_RETURN_IF_ERROR(GetTensorDataTransposed(*b_initializer, b_transposed));
       CreateCoreMLWeight(*coreml_inner_product->mutable_weights(), b_transposed);
     }
 

diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc
@@ -3,6 +3,7 @@
 
 #include "core/providers/common.h"
 
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/model_builder.h"
@@ -14,28 +15,62 @@
 class UnaryOpBuilder : public BaseOpBuilder {
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
+  bool SupportsMLProgram() const override { return true; }
 };
 
 Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                              const logging::Logger& /* logger */) const {
   const auto& op_type(node.OpType());
   const auto& input_defs(node.InputDefs());
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;
 
-  if (op_type == "Sqrt") {
-    layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT);
-  } else if (op_type == "Reciprocal") {
-    layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE);
-  } else {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
-  }
+    std::string_view coreml_op_type;
+    if (op_type == "Sqrt") {
+      coreml_op_type = "sqrt";
+    } else if (op_type == "Reciprocal") {
+      coreml_op_type = "inverse";
+    } else {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "UnaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type);
+    }
+
+    std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
+    AddOperationInput(*op, "x", input_defs[0]->Name());
+    if (op_type == "Reciprocal") {
+      float epsilon = 1e-4;  // epsilon: const T (Optional, default=1e-4)
+      auto dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+      if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+        AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon));
+      } else if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) {
+        AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", MLFloat16(epsilon)));
+      }
+    }
+
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
 
-  *layer->mutable_input()->Add() = input_defs[0]->Name();
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+    model_builder.AddOperation(std::move(op));
+  } else  // NOLINT
+#endif    // defined (COREML_ENABLE_MLPROGRAM)
+  {
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
-  model_builder.AddLayer(std::move(layer));
+    if (op_type == "Sqrt") {
+      layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT);
+    } else if (op_type == "Reciprocal") {
+      layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE);
+    } else {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
+    }
+
+    *layer->mutable_input()->Add() = input_defs[0]->Name();
+    *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+
+    model_builder.AddLayer(std::move(layer));
+  }
   return Status::OK();
 }