Add DynamicQuantizeLinear op (#2489)

ROCm · Dec 12, 2023 · 5fe1b07 · 5fe1b07
1 parent 7e61114
commit 5fe1b07
Show file tree

Hide file tree

Showing 7 changed files with 352 additions and 7 deletions.
diff --git a/src/onnx/parse_dynamicquantizelinear.cpp b/src/onnx/parse_dynamicquantizelinear.cpp
@@ -0,0 +1,151 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/tune_axis.hpp>
+#include <migraphx/common.hpp>
+#include <migraphx/onnx/broadcast_qdq.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+/*
+ *********************************************************************************
+ *  Reference: see DynamicQuantizeLinear in                                      *
+ *  https://github.com/onnx/onnx/blob/main/docs/Operators.md                     *
+ *********************************************************************************
+DynamicQuantizeLinear
+A Function to fuse calculation for Scale, Zero Point and FP32->8Bit conversion of FP32 Input data.
+Outputs Scale, ZeroPoint and Quantized Input for a given FP32 Input. Scale is calculated as:
+y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
+* where qmax and qmin are max and min values for quantization range i.e. [0, 255] in case of uint8
+* data range is adjusted to include 0.
+
+Zero point is calculated as:
+intermediate_zero_point = qmin - min(x)/y_scale
+y_zero_point = cast(round(saturate(itermediate_zero_point)))
+* where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8
+* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now
+only uint8 is supported.
+* rounding to nearest ties to even. Data quantization formula is:
+
+y = saturate (round (x / y_scale) + y_zero_point)
+* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8.Right now only
+uint8 is supported.
+* rounding to nearest ties to even.
+
+Version
+This version of the operator has been available since version 11 of the default ONNX operator set.
+
+Inputs
+x : T1
+Input tensor
+
+Outputs
+y : T2
+Quantized output tensor
+
+y_scale : tensor(float)
+Output scale. It's a scalar, which means a per-tensor/layer quantization.
+
+y_zero_point : T2
+Output zero point. It's a scalar, which means a per-tensor/layer quantization.
+
+Type Constraints
+T1 : tensor(float)
+Constrain 'x' to float tensor.
+
+T2 : tensor(uint8)
+Constrain 'y_zero_point' and 'y' to 8-bit unsigned integer tensor.
+*/
+
+struct parse_dynamicquantizelinear : op_parser<parse_dynamicquantizelinear>
+{
+    std::vector<op_desc> operators() const { return {{"DynamicQuantizeLinear"}}; }
+
+    std::vector<instruction_ref> parse(const op_desc& /*opd*/,
+                                       const onnx_parser& /*parser*/,
+                                       const onnx_parser::node_info& info,
+                                       const std::vector<instruction_ref>& args) const
+    {
+        auto x       = args[0];
+        auto x_shape = x->get_shape();
+        auto x_type  = x_shape.type();
+        if(x_shape.dynamic())
+            MIGRAPHX_THROW("DYNAMICQUANTIZELINEAR: dynamic shapes are not supported");
+
+        auto x_reshaped =
+            (x_shape.lens().size() == 1)
+                ? x
+                : info.add_instruction(
+                      migraphx::make_op("reshape", {{"dims", {x_shape.elements()}}}), x);
+
+        auto lit_0 = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0}});
+        x_reshaped =
+            info.add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, lit_0);
+
+        // 1. Computing y_scale
+        // Note: currently, DynamicQuantizeLinear only has uint8 quantization:
+        const auto Q_MAX = std::numeric_limits<uint8_t>::max();
+        const auto Q_MIN = std::numeric_limits<uint8_t>::min();
+
+        auto q_range =
+            info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX - Q_MIN}});
+
+        // maximum(0, max(x))
+        auto max_x =
+            info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped);
+        // minimum(0, min(x))
+        auto min_x =
+            info.add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped);
+
+        // y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
+        auto sub0    = info.add_common_op("sub", max_x, min_x);
+        auto y_scale = info.add_common_op("div", sub0, q_range);
+
+        // 2. Computing y_zero_point
+        // intermediate_zero_point = qmin - min(x) / y_scale
+        auto q_min     = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MIN}});
+        auto q_max     = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX}});
+        auto sub1      = info.add_common_op("sub", q_min, min_x);
+        auto interm_zp = info.add_common_op("div", sub1, y_scale);
+        // y_zero_point = cast(round(saturate(itermediate_zero_point)))
+        auto saturate = info.add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max);
+        auto round    = info.add_instruction(migraphx::make_op("nearbyint"), saturate);
+        auto y_zero_point = info.add_instruction(
+            migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round);
+
+        // 3. quantize x with y_scale and y_zero_point
+        auto quant = bcast_qdq_instr("quantizelinear", x, y_scale, y_zero_point, info);
+
+        return {quant, y_scale, y_zero_point};
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
diff --git a/test/onnx/dynamicquantizelinear_1d_test.onnx b/test/onnx/dynamicquantizelinear_1d_test.onnx
@@ -0,0 +1,19 @@
+	dynamicquantizelinear_1d_test:�
+4
+xyy_scaley_zero_point"DynamicQuantizeLineardynamicquantizelinear_1d_testZ
+x
+
+
+b
+y
+
+
+b
+y_scale
+
+
+b
+y_zero_point
+
+
+B
diff --git a/test/onnx/dynamicquantizelinear_2d_test.onnx b/test/onnx/dynamicquantizelinear_2d_test.onnx
@@ -0,0 +1,19 @@
+	dynamicquantizelinear_2d_test:�
+4
+xyy_scaley_zero_point"DynamicQuantizeLineardynamicquantizelinear_2d_testZ
+x
+
+
+b
+y
+
+
+b
+y_scale
+
+
+b
+y_zero_point
+
+
+B
diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
@@ -1968,6 +1968,40 @@ def dropout_test():
     return ([node], [x], [y])
 
 
+@onnx_test()
+def dynamicquantizelinear_1d_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [6])
+    y = helper.make_tensor_value_info('y', TensorProto.UINT8, [6])
+    y_scale = helper.make_tensor_value_info('y_scale', TensorProto.FLOAT, [1])
+    y_zero_point = helper.make_tensor_value_info('y_zero_point',
+                                                 TensorProto.UINT8, [1])
+
+    node = onnx.helper.make_node(
+        'DynamicQuantizeLinear',
+        inputs=['x'],
+        outputs=['y', 'y_scale', 'y_zero_point'],
+    )
+
+    return ([node], [x], [y, y_scale, y_zero_point])
+
+
+@onnx_test()
+def dynamicquantizelinear_2d_test():
+    x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
+    y = helper.make_tensor_value_info('y', TensorProto.UINT8, [3, 4])
+    y_scale = helper.make_tensor_value_info('y_scale', TensorProto.FLOAT, [1])
+    y_zero_point = helper.make_tensor_value_info('y_zero_point',
+                                                 TensorProto.UINT8, [1])
+
+    node = onnx.helper.make_node(
+        'DynamicQuantizeLinear',
+        inputs=['x'],
+        outputs=['y', 'y_scale', 'y_zero_point'],
+    )
+
+    return ([node], [x], [y, y_scale, y_zero_point])
+
+
 @onnx_test()
 def elu_test():
     x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])

diff --git a/test/onnx/onnx_test.cpp b/test/onnx/onnx_test.cpp
@@ -1865,6 +1865,50 @@ TEST_CASE(depthtospace_simple_test)
     EXPECT(p == prog);
 }
 
+TEST_CASE(dynamicquantizelinear_2d_test)
+{
+    migraphx::program p;
+    auto* mm    = p.get_main_module();
+    auto x_dims = {3, 4};
+    auto x_type = migraphx::shape::float_type;
+    auto x      = mm->add_parameter("x", {x_type, x_dims});
+
+    auto l0         = mm->add_literal({0.f});
+    auto x_reshaped = mm->add_instruction(migraphx::make_op("reshape", {{"dims", {12}}}), x);
+    x_reshaped = mm->add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, l0);
+
+    auto q_range = mm->add_literal(
+        migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits<uint8_t>::max()}});
+
+    auto max_x = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped);
+    auto min_x = mm->add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped);
+
+    auto sub0    = mm->add_instruction(migraphx::make_op("sub"), max_x, min_x);
+    auto y_scale = mm->add_instruction(migraphx::make_op("div"), sub0, q_range);
+
+    auto q_min = mm->add_literal(
+        migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits<uint8_t>::min()}});
+    auto q_max = mm->add_literal(
+        migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits<uint8_t>::max()}});
+    auto sub1         = mm->add_instruction(migraphx::make_op("sub"), q_min, min_x);
+    auto interm_zp    = mm->add_instruction(migraphx::make_op("div"), sub1, y_scale);
+    auto saturate     = mm->add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max);
+    auto round        = mm->add_instruction(migraphx::make_op("nearbyint"), saturate);
+    auto y_zero_point = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round);
+
+    auto scale_y_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", x_dims}}), y_scale);
+
+    auto y_pt_c_bcast = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", x_dims}}), y_zero_point);
+
+    mm->add_instruction(migraphx::make_op("quantizelinear"), x, scale_y_bcast, y_pt_c_bcast);
+
+    auto prog = optimize_onnx("dynamicquantizelinear_2d_test.onnx");
+    EXPECT(p == prog);
+}
+
 TEST_CASE(spacetodepth_test)
 {
     migraphx::program p;
@@ -2863,12 +2907,12 @@ migraphx::program make_group_norm(const std::vector<int64_t>& input_dims,
 
     auto eps = mm->add_literal(migraphx::literal{dtype, {eps_value}});
 
-    auto x_reshaped =
+    auto x_reshapedd =
         mm->add_instruction(migraphx::make_op("reshape", {{"dims", reshape_dims}}), x);
     auto mean =
-        mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_reshaped);
-    auto x_sub_mean    = add_common_op(*mm, migraphx::make_op("sub"), {x_reshaped, mean});
-    auto x_sqdiff_mean = add_common_op(*mm, migraphx::make_op("sqdiff"), {x_reshaped, mean});
+        mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_reshapedd);
+    auto x_sub_mean    = add_common_op(*mm, migraphx::make_op("sub"), {x_reshapedd, mean});
+    auto x_sqdiff_mean = add_common_op(*mm, migraphx::make_op("sqdiff"), {x_reshapedd, mean});
     auto var     = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}),
                                    x_sqdiff_mean);
     auto var_eps = add_common_op(*mm, migraphx::make_op("add"), {var, eps});

diff --git a/test/onnx/verify_onnx.cpp b/test/onnx/verify_onnx.cpp
@@ -351,6 +351,87 @@ TEST_CASE(depthtospace_simple_test)
     EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
 }
 
+TEST_CASE(dynamicquantizelinear_1d_test)
+{
+    auto p = migraphx::parse_onnx("dynamicquantizelinear_1d_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    std::vector<float> data{0, 2, -3, -2.5, 1.34, 0.5};
+    migraphx::shape s_x{migraphx::shape::float_type, {6}};
+    migraphx::parameter_map pp;
+    pp["x"]      = migraphx::argument(s_x, data.data());
+    auto results = p.eval(pp);
+
+    std::vector<uint8_t> y_results;
+    results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); });
+    std::vector<uint8_t> y_gold = {153, 255, 0, 26, 221, 179};
+    EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold));
+
+    std::vector<float> y_scale;
+    results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); });
+    std::vector<float> y_scale_gold = {0.0196078438};
+    EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold));
+
+    std::vector<uint8_t> y_zpt;
+    results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); });
+    std::vector<uint8_t> y_zpt_gold = {153};
+    EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold));
+}
+
+TEST_CASE(dynamicquantizelinear_1d_max_adjusted_test)
+{
+    auto p = migraphx::parse_onnx("dynamicquantizelinear_1d_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    std::vector<float> data{-1.0, -2.1, -1.3, -2.5, -3.34, -4.0};
+    migraphx::shape s_x{migraphx::shape::float_type, {6}};
+    migraphx::parameter_map pp;
+    pp["x"]      = migraphx::argument(s_x, data.data());
+    auto results = p.eval(pp);
+
+    std::vector<uint8_t> y_results;
+    results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); });
+    std::vector<uint8_t> y_gold = {191, 121, 172, 96, 42, 0};
+    EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold));
+
+    std::vector<float> y_scale;
+    results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); });
+    std::vector<float> y_scale_gold = {0.0156862754};
+    EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold));
+
+    std::vector<uint8_t> y_zpt;
+    results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); });
+    std::vector<uint8_t> y_zpt_gold = {255};
+    EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold));
+}
+
+TEST_CASE(dynamicquantizelinear_2d_test)
+{
+    auto p = migraphx::parse_onnx("dynamicquantizelinear_2d_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    std::vector<float> data{1.0, 2.1, 1.3, 2.5, 3.34, 4.0, 1.5, 2.6, 3.9, 4.0, 3.0, 2.345};
+    migraphx::shape s_x{migraphx::shape::float_type, {3, 4}};
+    migraphx::parameter_map pp;
+    pp["x"]      = migraphx::argument(s_x, data.data());
+    auto results = p.eval(pp);
+
+    std::vector<uint8_t> y_results;
+    results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); });
+    std::vector<uint8_t> y_gold = {64, 134, 83, 159, 213, 255, 96, 166, 249, 255, 191, 149};
+    EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold));
+
+    std::vector<float> y_scale;
+    results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); });
+    std::vector<float> y_scale_gold = {0.0156862754};
+    EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold));
+
+    std::vector<uint8_t> y_zpt;
+    results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); });
+    std::vector<uint8_t> y_zpt_gold = {0};
+    EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold));
+}
+
 TEST_CASE(spacetodepth_simple_test)
 {
     auto p = migraphx::parse_onnx("spacetodepth_simple_test.onnx");