From 07ecb34006f1cc3c3d2159aed67eb58eee6e83f9 Mon Sep 17 00:00:00 2001
From: Aleksei-grovety <113356454+Aleksei-grovety@users.noreply.github.com>
Date: Wed, 14 Feb 2024 17:39:38 +0400
Subject: [PATCH] [microNPU][ETHOSU] Add fixed point for matmul (#16401)

Add support for calculation matmul with 16 bits fixed point. Add flag enable_fixed_point to enable fixed point calculation.
---
 .../relay/backend/contrib/ethosu/legalize.py  | 50 +++++++++---
 python/tvm/relay/op/contrib/ethosu.py         | 76 ++++++++++++++++++-
 src/relay/op/contrib/ethosu/identity.cc       |  3 +-
 .../contrib/test_ethosu/test_codegen.py       | 51 +++++++++++++
 4 files changed, 168 insertions(+), 12 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
index 7ed69e1e9bf8..457ad6e11ba3 100644
--- a/python/tvm/relay/backend/contrib/ethosu/legalize.py
+++ b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -1402,20 +1402,23 @@ def callback(self, pre, post, node_map):
         return ethosu_fc
 
 
-class MatMulRewriter(DFPatternCallback):
-    """Legalize matrix multiplication to an NPU operator"""
+class MatrixMultiplicationRewriter(DFPatternCallback):
+    """Legalize matrix multiplication with two tensors into sequence of NPU operators"""
 
-    def __init__(self):
+    def __init__(
+        self,
+        params_class: Type,
+        pattern: CallPattern,
+    ):
         super().__init__(require_type=True)
-        self.pattern = (
-            wildcard().has_attr({"Composite": ethosu_patterns.MatMulParams.composite_name})
-        )(wildcard(), wildcard())
+        self.pattern = pattern
+        self.params_class = params_class
 
     def callback(self, pre, post, node_map):
-        params = ethosu_patterns.MatMulParams(post.op.body)
+        params = self.params_class(post.op.body)
         ifm = post.args[0]
         ifm2 = post.args[1]
-        lut = relay.const([], dtype="int8")
+        lut = relay.const([], dtype=params.ifm.dtype)
         activation_map = {"clip": "CLIP"}
         if params.activation:
             activation = activation_map[params.activation.op.name]
@@ -1471,7 +1474,7 @@ def callback(self, pre, post, node_map):
                 rounding_mode="NATURAL",
             )
 
-            # Convert tensor dtype from int32 to int8
+            # Convert tensor dtype from int32 to output dtype
             scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int32"), dtype="int32")
             reduce_sum = ethosu_ops.ethosu_binary_elementwise(
                 ifm=reduce_sum,
@@ -1487,7 +1490,7 @@ def callback(self, pre, post, node_map):
                 ifm_channels=1,
                 ifm2_channels=1,
                 reversed_operands=False,
-                ofm_dtype="int8",
+                ofm_dtype=params.ofm.dtype,
             )
 
             res_columns.append(reduce_sum)
@@ -1497,6 +1500,32 @@ def callback(self, pre, post, node_map):
         return relay.reshape(concat, params.ofm.shape)
 
 
+class MatMulRewriter(MatrixMultiplicationRewriter):
+    """Convert ethos-u.matmul composite function to sequence of NPU operators"""
+
+    def __init__(self):
+        super().__init__(
+            params_class=ethosu_patterns.MatMulParams,
+            pattern=(
+                wildcard().has_attr({"Composite": ethosu_patterns.MatMulParams.composite_name})
+            )(wildcard(), wildcard()),
+        )
+
+
+class MatMulFixedPointRewriter(MatrixMultiplicationRewriter):
+    """Convert ethos-u.matmul_fixed_point composite function to sequence of NPU operators"""
+
+    def __init__(self):
+        super().__init__(
+            params_class=ethosu_patterns.MatMulFixedPointParams,
+            pattern=(
+                wildcard().has_attr(
+                    {"Composite": ethosu_patterns.MatMulFixedPointParams.composite_name}
+                )
+            )(wildcard(), wildcard()),
+        )
+
+
 class PadRewriter(DFPatternCallback):
     """Convert ethos-u.pad2d composite function to ethosu_depthwise_conv2d
     operator"""
@@ -1644,6 +1673,7 @@ def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
             PartitionedSplitRewriter(),
             FullyConnectedRewriter(),
             MatMulRewriter(),
+            MatMulFixedPointRewriter(),
             SplitRewriter(),
             ChannelPadRewriter(),
             Conv2DRewriter(),
diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py
index 73cee9d0cd23..f24538242cf9 100644
--- a/python/tvm/relay/op/contrib/ethosu.py
+++ b/python/tvm/relay/op/contrib/ethosu.py
@@ -1917,7 +1917,7 @@ def is_valid(self) -> bool:
         Checks whether matrix multiplication has compatible attributes with HW
         """
 
-        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8]):
+        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int8, np.int16]):
             return False
         if not len(self.ifm.shape) == 2:
             return False
@@ -1938,6 +1938,75 @@ def matmul_pattern():
     return optional_clip
 
 
+class MatMulFixedPointParams:
+    """
+    This class will parse a call to an ethos-u.matmul_fixed_point composite
+    function and extract the parameter information.
+    """
+
+    composite_name = "ethos-u.matmul_fixed_point"
+
+    @requires_vela
+    def __init__(self, func_body):
+        from tvm.relay.backend.contrib.ethosu.util import QDenseArgs
+
+        dense_fixed_point = func_body.args[0]
+        dense = dense_fixed_point.args[0]
+        # fixed_point_multiply relay operation uses multiplier with 31 fractional bits
+        # so to determine the size of the fraction use the formula: 31 - shift
+        self.fraction_size = 31 - dense_fixed_point.attrs.shift
+        fract_scale = tvm.relay.Constant(tvm.nd.array(np.array(1 / 2**self.fraction_size)))
+        fract_zero_point = tvm.relay.Constant(tvm.nd.array(np.array(0, dtype="int32")))
+
+        self.activation = None
+        self.weights = TensorParams(
+            dense.args[QDenseArgs.WEIGHTS.value].args[0].args[0],
+            None,
+            fract_scale,
+            fract_zero_point,
+        )
+        self.ifm = TensorParams(
+            dense.args[QDenseArgs.IFM.value].args[0].args[0],
+            None,
+            fract_scale,
+            fract_zero_point,
+        )
+        self.ofm = TensorParams(
+            func_body,
+            None,
+            fract_scale,
+            fract_zero_point,
+        )
+
+    def is_valid(self) -> bool:
+        """
+        Checks whether matrix multiplication has compatible attributes with HW
+        """
+
+        if self.fraction_size < 0 or self.fraction_size > 16:
+            return False
+        if not check_valid_dtypes([self.ifm, self.ofm], supported_dtypes=[np.int16]):
+            return False
+        if not len(self.ifm.shape) == 2:
+            return False
+        if not len(self.ofm.shape) == 2:
+            return False
+        # The weights must be transposed
+        if self.ifm.shape[1] != self.weights.shape[1]:
+            return False
+        return True
+
+
+def matmul_fixed_point_pattern():
+    ifm = is_op("cast")(wildcard())
+    ifm2 = is_op("cast")(wildcard())
+    ifm = is_op("fixed_point_multiply")(ifm)
+    ifm2 = is_op("fixed_point_multiply")(ifm2)
+    dense = is_op("nn.dense")(ifm, ifm2)
+    dense = is_op("fixed_point_multiply")(dense)
+    return is_op("cast")(dense)
+
+
 class HardSwishParams:
     """
     This class will parse a call to a ethos-u.hard_swish composite function
@@ -2228,6 +2297,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
             matmul_pattern(),
             lambda pat: MatMulParams(pat).is_valid(),
         ),
+        (
+            MatMulFixedPointParams.composite_name,
+            matmul_fixed_point_pattern(),
+            lambda pat: MatMulFixedPointParams(pat).is_valid(),
+        ),
         (
             MaxPool2DParams.composite_name,
             qnn_maxpool2d_pattern(),
diff --git a/src/relay/op/contrib/ethosu/identity.cc b/src/relay/op/contrib/ethosu/identity.cc
index f808e8c21902..9ec6c6f42ce0 100644
--- a/src/relay/op/contrib/ethosu/identity.cc
+++ b/src/relay/op/contrib/ethosu/identity.cc
@@ -46,7 +46,8 @@ bool EthosuIdentityRel(const Array<Type>& types, int num_inputs, const Attrs& at
 
   const String operator_name = "ethosu_identity";
 
-  CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8)}, operator_name, "ifm");
+  CheckDataType(reporter, ifm->dtype, {DataType::UInt(8), DataType::Int(8), DataType::Int(16)},
+                operator_name, "ifm");
 
   if (ifm->shape.size() > 4) {
     reporter->GetDiagCtx().EmitFatal(
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
index f69c114cabd1..afcf27bb4517 100644
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -1624,5 +1624,56 @@ def subtract_sigmoid_function(lhs, rhs):
     )
 
 
+@pytest.mark.parametrize("accel_type", ["ethos-u55-256", "ethos-u65-256"])
+@pytest.mark.parametrize(
+    "ifm_shape,ofm_channels,fract_size,tolerance",
+    [[(1, 16), 8, 15, 0.001], [(2, 8), 16, 14, 0.001], [(4, 8), 16, 12, 0.001]],
+)
+def test_ethosu_matmul_fixed_point(accel_type, ifm_shape, ofm_channels, fract_size, tolerance):
+    np.random.seed(0)
+    dtype = "int16"
+    weights_shape = (ofm_channels, ifm_shape[1])
+
+    def create_model():
+        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
+        ifm2 = relay.var("ifm2", shape=weights_shape, dtype=dtype)
+        ifm_fixed_point = relay.cast(ifm, "int32")
+        ifm2_fixed_point = relay.cast(ifm2, "int32")
+        ifm_fixed_point = relay.fixed_point_multiply(ifm_fixed_point, 2**31 - 1, 0)
+        ifm2_fixed_point = relay.fixed_point_multiply(ifm2_fixed_point, 2**31 - 1, 0)
+        dense = relay.nn.dense(ifm_fixed_point, ifm2_fixed_point)
+        dense = relay.fixed_point_multiply(dense, 1, 16)
+        dense = relay.cast(dense, dtype)
+        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], dense))
+
+    def convert_to_fixed_point(arr, fract_size):
+        fract_fact = 0b1 << fract_size
+        return np.array(arr * fract_fact, dtype=np.int16)
+
+    cpu_mod = create_model()
+    ethosu_mod = partition_for_ethosu(cpu_mod)
+
+    input_data = {
+        "ifm": np.random.uniform(-0.5, 0.5, size=ifm_shape),
+        "ifm2": np.random.uniform(-0.5, 0.5, size=weights_shape),
+    }
+    input_data = {
+        "ifm": convert_to_fixed_point(input_data["ifm"], fract_size),
+        "ifm2": convert_to_fixed_point(input_data["ifm2"], fract_size),
+    }
+    output_data = generate_ref_data(cpu_mod, input_data)
+    output_data = {"output": output_data["output"].astype("int16")}
+    tolerance = convert_to_fixed_point(tolerance, fract_size)
+
+    infra.compare_ethosu_with_reference(
+        ethosu_mod,
+        input_data,
+        output_data,
+        accel_type,
+        enable_cascader=False,
+        output_tolerance=tolerance,
+    )
+
+
 if __name__ == "__main__":
     tvm.testing.main()