From 5fe1b0757e49e6eecee0a975d209ae77679675f7 Mon Sep 17 00:00:00 2001 From: Zakor Gyula <126694206+gyulaz-htec@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:58:53 +0100 Subject: [PATCH] Add DynamicQuantizeLinear op (#2489) --- src/onnx/parse_dynamicquantizelinear.cpp | 151 +++++++++++++++++++ test/onnx/dynamicquantizelinear_1d_test.onnx | 19 +++ test/onnx/dynamicquantizelinear_2d_test.onnx | 19 +++ test/onnx/gen_onnx.py | 34 +++++ test/onnx/onnx_test.cpp | 52 ++++++- test/onnx/verify_onnx.cpp | 81 ++++++++++ test/py/onnx_backend_test.py | 3 - 7 files changed, 352 insertions(+), 7 deletions(-) create mode 100644 src/onnx/parse_dynamicquantizelinear.cpp create mode 100644 test/onnx/dynamicquantizelinear_1d_test.onnx create mode 100644 test/onnx/dynamicquantizelinear_2d_test.onnx diff --git a/src/onnx/parse_dynamicquantizelinear.cpp b/src/onnx/parse_dynamicquantizelinear.cpp new file mode 100644 index 00000000000..64f63d6fcec --- /dev/null +++ b/src/onnx/parse_dynamicquantizelinear.cpp @@ -0,0 +1,151 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace onnx { + +/* + ********************************************************************************* + * Reference: see DynamicQuantizeLinear in * + * https://github.com/onnx/onnx/blob/main/docs/Operators.md * + ********************************************************************************* +DynamicQuantizeLinear +A Function to fuse calculation for Scale, Zero Point and FP32->8Bit conversion of FP32 Input data. +Outputs Scale, ZeroPoint and Quantized Input for a given FP32 Input. Scale is calculated as: +y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin) +* where qmax and qmin are max and min values for quantization range i.e. [0, 255] in case of uint8 +* data range is adjusted to include 0. + +Zero point is calculated as: +intermediate_zero_point = qmin - min(x)/y_scale +y_zero_point = cast(round(saturate(itermediate_zero_point))) +* where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8 +* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now +only uint8 is supported. +* rounding to nearest ties to even. Data quantization formula is: + +y = saturate (round (x / y_scale) + y_zero_point) +* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8.Right now only +uint8 is supported. +* rounding to nearest ties to even. + +Version +This version of the operator has been available since version 11 of the default ONNX operator set. + +Inputs +x : T1 +Input tensor + +Outputs +y : T2 +Quantized output tensor + +y_scale : tensor(float) +Output scale. It's a scalar, which means a per-tensor/layer quantization. + +y_zero_point : T2 +Output zero point. It's a scalar, which means a per-tensor/layer quantization. + +Type Constraints +T1 : tensor(float) +Constrain 'x' to float tensor. + +T2 : tensor(uint8) +Constrain 'y_zero_point' and 'y' to 8-bit unsigned integer tensor. +*/ + +struct parse_dynamicquantizelinear : op_parser +{ + std::vector operators() const { return {{"DynamicQuantizeLinear"}}; } + + std::vector parse(const op_desc& /*opd*/, + const onnx_parser& /*parser*/, + const onnx_parser::node_info& info, + const std::vector& args) const + { + auto x = args[0]; + auto x_shape = x->get_shape(); + auto x_type = x_shape.type(); + if(x_shape.dynamic()) + MIGRAPHX_THROW("DYNAMICQUANTIZELINEAR: dynamic shapes are not supported"); + + auto x_reshaped = + (x_shape.lens().size() == 1) + ? x + : info.add_instruction( + migraphx::make_op("reshape", {{"dims", {x_shape.elements()}}}), x); + + auto lit_0 = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0}}); + x_reshaped = + info.add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, lit_0); + + // 1. Computing y_scale + // Note: currently, DynamicQuantizeLinear only has uint8 quantization: + const auto Q_MAX = std::numeric_limits::max(); + const auto Q_MIN = std::numeric_limits::min(); + + auto q_range = + info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX - Q_MIN}}); + + // maximum(0, max(x)) + auto max_x = + info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped); + // minimum(0, min(x)) + auto min_x = + info.add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped); + + // y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin) + auto sub0 = info.add_common_op("sub", max_x, min_x); + auto y_scale = info.add_common_op("div", sub0, q_range); + + // 2. Computing y_zero_point + // intermediate_zero_point = qmin - min(x) / y_scale + auto q_min = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MIN}}); + auto q_max = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX}}); + auto sub1 = info.add_common_op("sub", q_min, min_x); + auto interm_zp = info.add_common_op("div", sub1, y_scale); + // y_zero_point = cast(round(saturate(itermediate_zero_point))) + auto saturate = info.add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max); + auto round = info.add_instruction(migraphx::make_op("nearbyint"), saturate); + auto y_zero_point = info.add_instruction( + migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round); + + // 3. quantize x with y_scale and y_zero_point + auto quant = bcast_qdq_instr("quantizelinear", x, y_scale, y_zero_point, info); + + return {quant, y_scale, y_zero_point}; + } +}; + +} // namespace onnx +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/test/onnx/dynamicquantizelinear_1d_test.onnx b/test/onnx/dynamicquantizelinear_1d_test.onnx new file mode 100644 index 00000000000..79982a578ff --- /dev/null +++ b/test/onnx/dynamicquantizelinear_1d_test.onnx @@ -0,0 +1,19 @@ + dynamicquantizelinear_1d_test:ª +4 +xyy_scale y_zero_point"DynamicQuantizeLineardynamicquantizelinear_1d_testZ +x + + +b +y + + +b +y_scale + + +b + y_zero_point + + +B \ No newline at end of file diff --git a/test/onnx/dynamicquantizelinear_2d_test.onnx b/test/onnx/dynamicquantizelinear_2d_test.onnx new file mode 100644 index 00000000000..7543f41afb2 --- /dev/null +++ b/test/onnx/dynamicquantizelinear_2d_test.onnx @@ -0,0 +1,19 @@ + dynamicquantizelinear_2d_test:² +4 +xyy_scale y_zero_point"DynamicQuantizeLineardynamicquantizelinear_2d_testZ +x +  + +b +y +  + +b +y_scale + + +b + y_zero_point + + +B \ No newline at end of file diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index 974bddf2ae7..423e196fab4 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -1968,6 +1968,40 @@ def dropout_test(): return ([node], [x], [y]) +@onnx_test() +def dynamicquantizelinear_1d_test(): + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [6]) + y = helper.make_tensor_value_info('y', TensorProto.UINT8, [6]) + y_scale = helper.make_tensor_value_info('y_scale', TensorProto.FLOAT, [1]) + y_zero_point = helper.make_tensor_value_info('y_zero_point', + TensorProto.UINT8, [1]) + + node = onnx.helper.make_node( + 'DynamicQuantizeLinear', + inputs=['x'], + outputs=['y', 'y_scale', 'y_zero_point'], + ) + + return ([node], [x], [y, y_scale, y_zero_point]) + + +@onnx_test() +def dynamicquantizelinear_2d_test(): + x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4]) + y = helper.make_tensor_value_info('y', TensorProto.UINT8, [3, 4]) + y_scale = helper.make_tensor_value_info('y_scale', TensorProto.FLOAT, [1]) + y_zero_point = helper.make_tensor_value_info('y_zero_point', + TensorProto.UINT8, [1]) + + node = onnx.helper.make_node( + 'DynamicQuantizeLinear', + inputs=['x'], + outputs=['y', 'y_scale', 'y_zero_point'], + ) + + return ([node], [x], [y, y_scale, y_zero_point]) + + @onnx_test() def elu_test(): x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3]) diff --git a/test/onnx/onnx_test.cpp b/test/onnx/onnx_test.cpp index 99760160a75..13230bbb8dd 100644 --- a/test/onnx/onnx_test.cpp +++ b/test/onnx/onnx_test.cpp @@ -1865,6 +1865,50 @@ TEST_CASE(depthtospace_simple_test) EXPECT(p == prog); } +TEST_CASE(dynamicquantizelinear_2d_test) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + auto x_dims = {3, 4}; + auto x_type = migraphx::shape::float_type; + auto x = mm->add_parameter("x", {x_type, x_dims}); + + auto l0 = mm->add_literal({0.f}); + auto x_reshaped = mm->add_instruction(migraphx::make_op("reshape", {{"dims", {12}}}), x); + x_reshaped = mm->add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, l0); + + auto q_range = mm->add_literal( + migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits::max()}}); + + auto max_x = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped); + auto min_x = mm->add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped); + + auto sub0 = mm->add_instruction(migraphx::make_op("sub"), max_x, min_x); + auto y_scale = mm->add_instruction(migraphx::make_op("div"), sub0, q_range); + + auto q_min = mm->add_literal( + migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits::min()}}); + auto q_max = mm->add_literal( + migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits::max()}}); + auto sub1 = mm->add_instruction(migraphx::make_op("sub"), q_min, min_x); + auto interm_zp = mm->add_instruction(migraphx::make_op("div"), sub1, y_scale); + auto saturate = mm->add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max); + auto round = mm->add_instruction(migraphx::make_op("nearbyint"), saturate); + auto y_zero_point = mm->add_instruction( + migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round); + + auto scale_y_bcast = + mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", x_dims}}), y_scale); + + auto y_pt_c_bcast = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", x_dims}}), y_zero_point); + + mm->add_instruction(migraphx::make_op("quantizelinear"), x, scale_y_bcast, y_pt_c_bcast); + + auto prog = optimize_onnx("dynamicquantizelinear_2d_test.onnx"); + EXPECT(p == prog); +} + TEST_CASE(spacetodepth_test) { migraphx::program p; @@ -2863,12 +2907,12 @@ migraphx::program make_group_norm(const std::vector& input_dims, auto eps = mm->add_literal(migraphx::literal{dtype, {eps_value}}); - auto x_reshaped = + auto x_reshapedd = mm->add_instruction(migraphx::make_op("reshape", {{"dims", reshape_dims}}), x); auto mean = - mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_reshaped); - auto x_sub_mean = add_common_op(*mm, migraphx::make_op("sub"), {x_reshaped, mean}); - auto x_sqdiff_mean = add_common_op(*mm, migraphx::make_op("sqdiff"), {x_reshaped, mean}); + mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_reshapedd); + auto x_sub_mean = add_common_op(*mm, migraphx::make_op("sub"), {x_reshapedd, mean}); + auto x_sqdiff_mean = add_common_op(*mm, migraphx::make_op("sqdiff"), {x_reshapedd, mean}); auto var = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_sqdiff_mean); auto var_eps = add_common_op(*mm, migraphx::make_op("add"), {var, eps}); diff --git a/test/onnx/verify_onnx.cpp b/test/onnx/verify_onnx.cpp index f21c45b6d59..88152602c59 100644 --- a/test/onnx/verify_onnx.cpp +++ b/test/onnx/verify_onnx.cpp @@ -351,6 +351,87 @@ TEST_CASE(depthtospace_simple_test) EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } +TEST_CASE(dynamicquantizelinear_1d_test) +{ + auto p = migraphx::parse_onnx("dynamicquantizelinear_1d_test.onnx"); + p.compile(migraphx::make_target("ref")); + + std::vector data{0, 2, -3, -2.5, 1.34, 0.5}; + migraphx::shape s_x{migraphx::shape::float_type, {6}}; + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s_x, data.data()); + auto results = p.eval(pp); + + std::vector y_results; + results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); }); + std::vector y_gold = {153, 255, 0, 26, 221, 179}; + EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold)); + + std::vector y_scale; + results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); }); + std::vector y_scale_gold = {0.0196078438}; + EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold)); + + std::vector y_zpt; + results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); }); + std::vector y_zpt_gold = {153}; + EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold)); +} + +TEST_CASE(dynamicquantizelinear_1d_max_adjusted_test) +{ + auto p = migraphx::parse_onnx("dynamicquantizelinear_1d_test.onnx"); + p.compile(migraphx::make_target("ref")); + + std::vector data{-1.0, -2.1, -1.3, -2.5, -3.34, -4.0}; + migraphx::shape s_x{migraphx::shape::float_type, {6}}; + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s_x, data.data()); + auto results = p.eval(pp); + + std::vector y_results; + results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); }); + std::vector y_gold = {191, 121, 172, 96, 42, 0}; + EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold)); + + std::vector y_scale; + results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); }); + std::vector y_scale_gold = {0.0156862754}; + EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold)); + + std::vector y_zpt; + results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); }); + std::vector y_zpt_gold = {255}; + EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold)); +} + +TEST_CASE(dynamicquantizelinear_2d_test) +{ + auto p = migraphx::parse_onnx("dynamicquantizelinear_2d_test.onnx"); + p.compile(migraphx::make_target("ref")); + + std::vector data{1.0, 2.1, 1.3, 2.5, 3.34, 4.0, 1.5, 2.6, 3.9, 4.0, 3.0, 2.345}; + migraphx::shape s_x{migraphx::shape::float_type, {3, 4}}; + migraphx::parameter_map pp; + pp["x"] = migraphx::argument(s_x, data.data()); + auto results = p.eval(pp); + + std::vector y_results; + results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); }); + std::vector y_gold = {64, 134, 83, 159, 213, 255, 96, 166, 249, 255, 191, 149}; + EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold)); + + std::vector y_scale; + results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); }); + std::vector y_scale_gold = {0.0156862754}; + EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold)); + + std::vector y_zpt; + results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); }); + std::vector y_zpt_gold = {0}; + EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold)); +} + TEST_CASE(spacetodepth_simple_test) { auto p = migraphx::parse_onnx("spacetodepth_simple_test.onnx"); diff --git a/test/py/onnx_backend_test.py b/test/py/onnx_backend_test.py index 016db336681..e226df40996 100644 --- a/test/py/onnx_backend_test.py +++ b/test/py/onnx_backend_test.py @@ -118,9 +118,6 @@ def disabled_tests_onnx_1_7_0(backend_test): backend_test.exclude(r'test_convtranspose_1d_cpu') backend_test.exclude(r'test_det_2d_cpu') backend_test.exclude(r'test_det_nd_cpu') - backend_test.exclude(r'test_dynamicquantizelinear_cpu') - backend_test.exclude(r'test_dynamicquantizelinear_max_adjusted_cpu') - backend_test.exclude(r'test_dynamicquantizelinear_min_adjusted_cpu') backend_test.exclude(r'test_edge_pad_cpu') backend_test.exclude(r'test_einsum_batch_diagonal_cpu') backend_test.exclude(r'test_einsum_batch_matmul_cpu')