Skip to content

Commit

Permalink
Add DynamicQuantizeLinear op (#2489)
Browse files Browse the repository at this point in the history
  • Loading branch information
gyulaz-htec authored Dec 12, 2023
1 parent 7e61114 commit 5fe1b07
Show file tree
Hide file tree
Showing 7 changed files with 352 additions and 7 deletions.
151 changes: 151 additions & 0 deletions src/onnx/parse_dynamicquantizelinear.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/common.hpp>
#include <migraphx/onnx/broadcast_qdq.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {

/*
*********************************************************************************
* Reference: see DynamicQuantizeLinear in *
* https://github.com/onnx/onnx/blob/main/docs/Operators.md *
*********************************************************************************
DynamicQuantizeLinear
A Function to fuse calculation for Scale, Zero Point and FP32->8Bit conversion of FP32 Input data.
Outputs Scale, ZeroPoint and Quantized Input for a given FP32 Input. Scale is calculated as:
y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
* where qmax and qmin are max and min values for quantization range i.e. [0, 255] in case of uint8
* data range is adjusted to include 0.
Zero point is calculated as:
intermediate_zero_point = qmin - min(x)/y_scale
y_zero_point = cast(round(saturate(itermediate_zero_point)))
* where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8
* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now
only uint8 is supported.
* rounding to nearest ties to even. Data quantization formula is:
y = saturate (round (x / y_scale) + y_zero_point)
* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8.Right now only
uint8 is supported.
* rounding to nearest ties to even.
Version
This version of the operator has been available since version 11 of the default ONNX operator set.
Inputs
x : T1
Input tensor
Outputs
y : T2
Quantized output tensor
y_scale : tensor(float)
Output scale. It's a scalar, which means a per-tensor/layer quantization.
y_zero_point : T2
Output zero point. It's a scalar, which means a per-tensor/layer quantization.
Type Constraints
T1 : tensor(float)
Constrain 'x' to float tensor.
T2 : tensor(uint8)
Constrain 'y_zero_point' and 'y' to 8-bit unsigned integer tensor.
*/

struct parse_dynamicquantizelinear : op_parser<parse_dynamicquantizelinear>
{
std::vector<op_desc> operators() const { return {{"DynamicQuantizeLinear"}}; }

std::vector<instruction_ref> parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
auto x = args[0];
auto x_shape = x->get_shape();
auto x_type = x_shape.type();
if(x_shape.dynamic())
MIGRAPHX_THROW("DYNAMICQUANTIZELINEAR: dynamic shapes are not supported");

auto x_reshaped =
(x_shape.lens().size() == 1)
? x
: info.add_instruction(
migraphx::make_op("reshape", {{"dims", {x_shape.elements()}}}), x);

auto lit_0 = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0}});
x_reshaped =
info.add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, lit_0);

// 1. Computing y_scale
// Note: currently, DynamicQuantizeLinear only has uint8 quantization:
const auto Q_MAX = std::numeric_limits<uint8_t>::max();
const auto Q_MIN = std::numeric_limits<uint8_t>::min();

auto q_range =
info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX - Q_MIN}});

// maximum(0, max(x))
auto max_x =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped);
// minimum(0, min(x))
auto min_x =
info.add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped);

// y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
auto sub0 = info.add_common_op("sub", max_x, min_x);
auto y_scale = info.add_common_op("div", sub0, q_range);

// 2. Computing y_zero_point
// intermediate_zero_point = qmin - min(x) / y_scale
auto q_min = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MIN}});
auto q_max = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX}});
auto sub1 = info.add_common_op("sub", q_min, min_x);
auto interm_zp = info.add_common_op("div", sub1, y_scale);
// y_zero_point = cast(round(saturate(itermediate_zero_point)))
auto saturate = info.add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max);
auto round = info.add_instruction(migraphx::make_op("nearbyint"), saturate);
auto y_zero_point = info.add_instruction(
migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round);

// 3. quantize x with y_scale and y_zero_point
auto quant = bcast_qdq_instr("quantizelinear", x, y_scale, y_zero_point, info);

return {quant, y_scale, y_zero_point};
}
};

} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
19 changes: 19 additions & 0 deletions test/onnx/dynamicquantizelinear_1d_test.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
 dynamicquantizelinear_1d_test:�
4
xyy_scale y_zero_point"DynamicQuantizeLineardynamicquantizelinear_1d_testZ
x


b
y


b
y_scale


b
y_zero_point


B
19 changes: 19 additions & 0 deletions test/onnx/dynamicquantizelinear_2d_test.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
 dynamicquantizelinear_2d_test:�
4
xyy_scale y_zero_point"DynamicQuantizeLineardynamicquantizelinear_2d_testZ
x


b
y


b
y_scale


b
y_zero_point


B
34 changes: 34 additions & 0 deletions test/onnx/gen_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -1968,6 +1968,40 @@ def dropout_test():
return ([node], [x], [y])


@onnx_test()
def dynamicquantizelinear_1d_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [6])
y = helper.make_tensor_value_info('y', TensorProto.UINT8, [6])
y_scale = helper.make_tensor_value_info('y_scale', TensorProto.FLOAT, [1])
y_zero_point = helper.make_tensor_value_info('y_zero_point',
TensorProto.UINT8, [1])

node = onnx.helper.make_node(
'DynamicQuantizeLinear',
inputs=['x'],
outputs=['y', 'y_scale', 'y_zero_point'],
)

return ([node], [x], [y, y_scale, y_zero_point])


@onnx_test()
def dynamicquantizelinear_2d_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 4])
y = helper.make_tensor_value_info('y', TensorProto.UINT8, [3, 4])
y_scale = helper.make_tensor_value_info('y_scale', TensorProto.FLOAT, [1])
y_zero_point = helper.make_tensor_value_info('y_zero_point',
TensorProto.UINT8, [1])

node = onnx.helper.make_node(
'DynamicQuantizeLinear',
inputs=['x'],
outputs=['y', 'y_scale', 'y_zero_point'],
)

return ([node], [x], [y, y_scale, y_zero_point])


@onnx_test()
def elu_test():
x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
Expand Down
52 changes: 48 additions & 4 deletions test/onnx/onnx_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1865,6 +1865,50 @@ TEST_CASE(depthtospace_simple_test)
EXPECT(p == prog);
}

TEST_CASE(dynamicquantizelinear_2d_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto x_dims = {3, 4};
auto x_type = migraphx::shape::float_type;
auto x = mm->add_parameter("x", {x_type, x_dims});

auto l0 = mm->add_literal({0.f});
auto x_reshaped = mm->add_instruction(migraphx::make_op("reshape", {{"dims", {12}}}), x);
x_reshaped = mm->add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, l0);

auto q_range = mm->add_literal(
migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits<uint8_t>::max()}});

auto max_x = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped);
auto min_x = mm->add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped);

auto sub0 = mm->add_instruction(migraphx::make_op("sub"), max_x, min_x);
auto y_scale = mm->add_instruction(migraphx::make_op("div"), sub0, q_range);

auto q_min = mm->add_literal(
migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits<uint8_t>::min()}});
auto q_max = mm->add_literal(
migraphx::literal{migraphx::shape{x_type}, {std::numeric_limits<uint8_t>::max()}});
auto sub1 = mm->add_instruction(migraphx::make_op("sub"), q_min, min_x);
auto interm_zp = mm->add_instruction(migraphx::make_op("div"), sub1, y_scale);
auto saturate = mm->add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max);
auto round = mm->add_instruction(migraphx::make_op("nearbyint"), saturate);
auto y_zero_point = mm->add_instruction(
migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round);

auto scale_y_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", x_dims}}), y_scale);

auto y_pt_c_bcast = mm->add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", x_dims}}), y_zero_point);

mm->add_instruction(migraphx::make_op("quantizelinear"), x, scale_y_bcast, y_pt_c_bcast);

auto prog = optimize_onnx("dynamicquantizelinear_2d_test.onnx");
EXPECT(p == prog);
}

TEST_CASE(spacetodepth_test)
{
migraphx::program p;
Expand Down Expand Up @@ -2863,12 +2907,12 @@ migraphx::program make_group_norm(const std::vector<int64_t>& input_dims,

auto eps = mm->add_literal(migraphx::literal{dtype, {eps_value}});

auto x_reshaped =
auto x_reshapedd =
mm->add_instruction(migraphx::make_op("reshape", {{"dims", reshape_dims}}), x);
auto mean =
mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_reshaped);
auto x_sub_mean = add_common_op(*mm, migraphx::make_op("sub"), {x_reshaped, mean});
auto x_sqdiff_mean = add_common_op(*mm, migraphx::make_op("sqdiff"), {x_reshaped, mean});
mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}), x_reshapedd);
auto x_sub_mean = add_common_op(*mm, migraphx::make_op("sub"), {x_reshapedd, mean});
auto x_sqdiff_mean = add_common_op(*mm, migraphx::make_op("sqdiff"), {x_reshapedd, mean});
auto var = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", reduce_axes}}),
x_sqdiff_mean);
auto var_eps = add_common_op(*mm, migraphx::make_op("add"), {var, eps});
Expand Down
81 changes: 81 additions & 0 deletions test/onnx/verify_onnx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,87 @@ TEST_CASE(depthtospace_simple_test)
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
}

TEST_CASE(dynamicquantizelinear_1d_test)
{
auto p = migraphx::parse_onnx("dynamicquantizelinear_1d_test.onnx");
p.compile(migraphx::make_target("ref"));

std::vector<float> data{0, 2, -3, -2.5, 1.34, 0.5};
migraphx::shape s_x{migraphx::shape::float_type, {6}};
migraphx::parameter_map pp;
pp["x"] = migraphx::argument(s_x, data.data());
auto results = p.eval(pp);

std::vector<uint8_t> y_results;
results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); });
std::vector<uint8_t> y_gold = {153, 255, 0, 26, 221, 179};
EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold));

std::vector<float> y_scale;
results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); });
std::vector<float> y_scale_gold = {0.0196078438};
EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold));

std::vector<uint8_t> y_zpt;
results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); });
std::vector<uint8_t> y_zpt_gold = {153};
EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold));
}

TEST_CASE(dynamicquantizelinear_1d_max_adjusted_test)
{
auto p = migraphx::parse_onnx("dynamicquantizelinear_1d_test.onnx");
p.compile(migraphx::make_target("ref"));

std::vector<float> data{-1.0, -2.1, -1.3, -2.5, -3.34, -4.0};
migraphx::shape s_x{migraphx::shape::float_type, {6}};
migraphx::parameter_map pp;
pp["x"] = migraphx::argument(s_x, data.data());
auto results = p.eval(pp);

std::vector<uint8_t> y_results;
results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); });
std::vector<uint8_t> y_gold = {191, 121, 172, 96, 42, 0};
EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold));

std::vector<float> y_scale;
results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); });
std::vector<float> y_scale_gold = {0.0156862754};
EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold));

std::vector<uint8_t> y_zpt;
results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); });
std::vector<uint8_t> y_zpt_gold = {255};
EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold));
}

TEST_CASE(dynamicquantizelinear_2d_test)
{
auto p = migraphx::parse_onnx("dynamicquantizelinear_2d_test.onnx");
p.compile(migraphx::make_target("ref"));

std::vector<float> data{1.0, 2.1, 1.3, 2.5, 3.34, 4.0, 1.5, 2.6, 3.9, 4.0, 3.0, 2.345};
migraphx::shape s_x{migraphx::shape::float_type, {3, 4}};
migraphx::parameter_map pp;
pp["x"] = migraphx::argument(s_x, data.data());
auto results = p.eval(pp);

std::vector<uint8_t> y_results;
results.at(0).visit([&](auto output) { y_results.assign(output.begin(), output.end()); });
std::vector<uint8_t> y_gold = {64, 134, 83, 159, 213, 255, 96, 166, 249, 255, 191, 149};
EXPECT(migraphx::verify::verify_rms_range(y_results, y_gold));

std::vector<float> y_scale;
results.at(1).visit([&](auto output) { y_scale.assign(output.begin(), output.end()); });
std::vector<float> y_scale_gold = {0.0156862754};
EXPECT(migraphx::verify::verify_rms_range(y_scale, y_scale_gold));

std::vector<uint8_t> y_zpt;
results.at(2).visit([&](auto output) { y_zpt.assign(output.begin(), output.end()); });
std::vector<uint8_t> y_zpt_gold = {0};
EXPECT(migraphx::verify::verify_rms_range(y_zpt, y_zpt_gold));
}

TEST_CASE(spacetodepth_simple_test)
{
auto p = migraphx::parse_onnx("spacetodepth_simple_test.onnx");
Expand Down
Loading

0 comments on commit 5fe1b07

Please sign in to comment.