Skip to content

Commit

Permalink
q_linearadd operator
Browse files Browse the repository at this point in the history
  • Loading branch information
lakhinderwalia committed Oct 2, 2023
1 parent dcc7b0a commit 4ee4d69
Show file tree
Hide file tree
Showing 4 changed files with 268 additions and 0 deletions.
202 changes: 202 additions & 0 deletions src/onnx/parse_qlinearadd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/instruction.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {

/*
*********************************************************************************
* Reference: see QLinearAdd in *
* https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
*********************************************************************************
com.microsoft.QLinearAdd
Performs element-wise binary addition on 8 bit data types (with Numpy-style broadcasting support).
C = (A_scale * (A - A_zero_point) + B_scale * (B - B_zero_point))/C_scale + C_zero_point
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator
set.
Inputs (7 - 8)
A : T
First operand.
A_scale : tensor(float)
Input A's scale. It's a scalar, which means a per-tensor/layer quantization.
A_zero_point (optional) : T
Input A zero point. Default value is 0 if it's not specified. It's a scalar, which means a
per-tensor/layer quantization.
B : T
Second operand.
B_scale : tensor(float)
Input B's scale. It's a scalar, which means a per-tensor/layer quantization.
B_zero_point (optional) : T
Input B zero point. Default value is 0 if it's not specified. It's a scalar, which means a
per-tensor/layer quantization.
C_scale : tensor(float)
Output scale. It's a scalar, which means a per-tensor/layer quantization.
C_zero_point (optional) : T
Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a
per-tensor/layer quantization.
Outputs
C : T
Result, has same element type as two inputs
Type Constraints
T : tensor(uint8), tensor(int8)
Constrain input and output types to 8 bit signed and unsigned tensors.
*/

struct parse_qlinearadd : op_parser<parse_qlinearadd>
{
std::vector<op_desc> operators() const { return {{"QLinearAdd"}}; }

// basic type checking for QLinearAdd Operator
void check_inputs(const std::vector<instruction_ref>& args) const
{
if(args.size() < 7)
MIGRAPHX_THROW("QLINEARADD: missing inputs");

const auto& in_a = args[0];
const auto& in_b = args[3];

auto sh_a = in_a->get_shape();
auto sh_b = in_b->get_shape();
if(sh_a != sh_b)
MIGRAPHX_THROW("QLINEARADD: mismatched input shapes");

auto type_a = sh_a.type();
auto type_b = sh_b.type();
if(type_a != migraphx::shape::int8_type and type_a != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARADD: unsupported input type");
if(type_b != migraphx::shape::int8_type and type_b != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARADD: unsupported input type");
}

instruction_ref bcast_scalar_instr(const migraphx::shape& shape_out,
const instruction_ref& arg_in,
const onnx_parser::node_info& info) const
{
auto bcast_instr_out = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", shape_out.lens()}}), arg_in);
return bcast_instr_out;
}

// This method is to prep for quantizelinear or dequantizelinear operation for
// either the broadcasting of weight-scale or zero-points of qlinearadd operator
// outputs: operator op (inputs x, broadcasted: scale (float) & zero_pt (8-bit))
instruction_ref bcast_qdq_instr(const std::string& op_name,
const instruction_ref& x_in,
const instruction_ref& arg_fscale,
const instruction_ref& arg_z_pt,
const onnx_parser::node_info& info) const
{
auto in_lens = x_in->get_shape().lens();

// prep 1: broadcast scale. it can come as a scalar or a 1-D tensor.
std::vector<float> sc_val;
auto ev_arg_fscale = arg_fscale->eval();
ev_arg_fscale.visit([&](auto s) { sc_val.assign(s.begin(), s.end()); });
shape sh_scale = {shape::float_type, {sc_val.size()}};
instruction_ref bcast_scale;
if(sc_val.size() > 1)
bcast_scale = info.add_instruction(
migraphx::make_op("broadcast", {{"axis", 0}, {"out_lens", in_lens}}),
info.add_literal(sh_scale, sc_val));
else
bcast_scale =
info.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", in_lens}}),
info.add_literal(sh_scale, sc_val));

// prep 2: broadcast zero point. it can come as a scalar or a 1-D tensor.
std::vector<int> z_pt_val;
auto ev_arg_z_pt = arg_z_pt->eval();
ev_arg_z_pt.visit([&](auto z) { z_pt_val.assign(z.begin(), z.end()); });
instruction_ref bcast_zero_pt;
if(z_pt_val.size() > 1)
bcast_zero_pt = info.add_instruction(
migraphx::make_op("broadcast", {{"axis", 0}, {"out_lens", in_lens}}), arg_z_pt);
else
bcast_zero_pt = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", in_lens}}), arg_z_pt);

// op_name is either quantizelinear or dequantizelinear:
return info.add_instruction(migraphx::make_op(op_name), x_in, bcast_scale, bcast_zero_pt);
}

instruction_ref parse(const op_desc& /* opd */,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
check_inputs(args);

// A
const auto& in_a = args[0];
const auto& in_scale_a = args[1];
const auto& in_zero_pt_a = args[2];
auto dquant_a = bcast_qdq_instr("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info);

// B
const auto& in_b = args[3];
const auto& in_scale_b = args[4];
const auto& in_zero_pt_b = args[5];
auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info);

// C = A + B
auto out_c = info.add_instruction(migraphx::make_op("add"), dquant_a, dquant_b);

const auto& in_scale_c = args[6];

// zero_pt for C is supplied as the last optional argument..
if(args.size() == 8)
return (bcast_qdq_instr("quantizelinear", out_c, in_scale_c, args[7], info));

// if no zero_pt: just broadcast the scale..
auto bcast_scale_c = bcast_scalar_instr(out_c->get_shape(), in_scale_c, info);
return (info.add_instruction(migraphx::make_op("quantizelinear"), out_c, bcast_scale_c));
}
};

} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
30 changes: 30 additions & 0 deletions test/onnx/gen_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@


def onnx_test(external_data=False):

def create_onnx_test(op_test):

def run_test():
op_info = op_test()
if len(op_info) > 3:
Expand Down Expand Up @@ -5017,6 +5019,34 @@ def prelu_brcst_test():
return ([node], [arg0, arg1], [arg_out])


@onnx_test()
def qlinearadd_test():
a = helper.make_tensor_value_info('A', TensorProto.UINT8, [64])
sc_a = helper.make_tensor('A_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_a = helper.make_tensor('A_zero_point', TensorProto.UINT8, [], [0])

b = helper.make_tensor_value_info('B', TensorProto.UINT8, [64])
sc_b = helper.make_tensor('B_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_b = helper.make_tensor('B_zero_point', TensorProto.UINT8, [],
[128])

sc_c = helper.make_tensor('C_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_c = helper.make_tensor('C_zero_point', TensorProto.UINT8, [], [64])

c = helper.make_tensor_value_info('C', TensorProto.UINT8, [64])

node = onnx.helper.make_node(
'QLinearAdd',
inputs=[
'A', 'A_scale', 'A_zero_point', 'B', 'B_scale', 'B_zero_point',
'C_scale', 'C_zero_point'
],
outputs=['C'],
)
return ([node], [a, b], [c],
[sc_a, zero_pt_a, sc_b, zero_pt_b, sc_c, zero_pt_c])


@onnx_test()
def quantizelinear_test():
arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [5])
Expand Down
Binary file added test/onnx/qlinearadd_test.onnx
Binary file not shown.
36 changes: 36 additions & 0 deletions test/onnx/verify_onnx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,42 @@ TEST_CASE(nonzero_test)
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
}

TEST_CASE(qlinearadd_test)
{
// github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QLinearAdd
migraphx::program p = migraphx::parse_onnx("qlinearadd_test.onnx");
p.compile(migraphx::make_target("ref"));

migraphx::shape a{migraphx::shape::uint8_type, {64}};
std::vector<unsigned char> data_a = {
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126};

migraphx::shape b{migraphx::shape::uint8_type, {64}};
std::vector<unsigned char> data_b = {
128, 126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98,
96, 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66,
64, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34,
32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2};

migraphx::parameter_map pp;
pp["A"] = migraphx::argument(a, data_a.data());
pp["B"] = migraphx::argument(b, data_b.data());
auto result = p.eval(pp).back();

std::vector<unsigned char> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });

std::vector<unsigned char> gold = {
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64};

EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
}

TEST_CASE(resize_downsample_f_test)
{
migraphx::program p = migraphx::parse_onnx("resize_downsample_f_test.onnx");
Expand Down

0 comments on commit 4ee4d69

Please sign in to comment.