From 1cccc3b5d65cae743a2becb7e256c05897af29ca Mon Sep 17 00:00:00 2001
From: Eric Lunderberg <Lunderberg@users.noreply.github.com>
Date: Fri, 22 Mar 2024 16:36:16 -0500
Subject: [PATCH] [SLM] Allow modules to define pre-processing of weights
 (#16757)

* [SLM] Allow TensorStructInfo to specify parameter in export

Prior to this commit, the parameter specification for SLM tensor
needed to be passed as a `nn.spec.Tensor`.  As this object is only
used to construct a `relax.TensorStructInfo`, and has the same fields
as a `relax.TensorStructInfo`, this commit allows the parameter
specification to be passed as a `relax.TensorStructInfo`.

* Resolve breakage in unit tests

* [SLM] Use `CopyWithNewVars` to de-duplicate symbolic variables

Prior to this commit, a `nn.spec.Tensor`'s shape had special handling
to ensure that symbolic variable were not reused across multiple
functions.  This commit updates this to instead be performed using the
`CopyWithNewVars` function.

* [SLM] Allow modules to define pre-processing of weights

Prior to this commit, the weights used by `nn.Module` instances were
required to be `nn.Parameter` instances.  This commit allows the
weights to instead be `nn.Tensor` instances, defined in terms of other
`nn.Parameter` weights.  This allows a model to define both the
original weights that would be present in an external
checkpoint (e.g. a Pytorch or Safetensors file), and the
pre-processing that should be performed on those weights.

* Undo portions that would introduce R.Tensor to nn.Module

* Remove unit tests that were related to TensorStructInfo
---
 python/tvm/relax/frontend/nn/core.py          |  17 +-
 python/tvm/relax/frontend/nn/exporter.py      |  40 +-
 .../python/relax/test_frontend_nn_exporter.py | 443 ++++++++++++++++++
 .../relax/test_frontend_nn_extern_module.py   |  10 +-
 .../python/relax/test_frontend_nn_modules.py  |   3 +-
 tests/python/relax/test_frontend_nn_op.py     |  27 +-
 .../python/relax/test_frontend_nn_packing.py  |   3 +-
 .../relax/test_frontend_nn_subroutines.py     |  13 +-
 8 files changed, 498 insertions(+), 58 deletions(-)
 create mode 100644 tests/python/relax/test_frontend_nn_exporter.py

diff --git a/python/tvm/relax/frontend/nn/core.py b/python/tvm/relax/frontend/nn/core.py
index b7b3f411ed41..820acd235d8c 100644
--- a/python/tvm/relax/frontend/nn/core.py
+++ b/python/tvm/relax/frontend/nn/core.py
@@ -591,7 +591,22 @@ def wrap_nested(expr: rx.Expr, name: str) -> Union[Tensor, Sequence[Tensor]]:
         The computed result.
     """
     if not isinstance(expr, rx.DataflowVar):
-        expr = BlockBuilder.current().emit(expr, name)
+        block_builder = BlockBuilder.current()
+        if block_builder is None:
+            # Normalize to make sure we have valid StructInfo, but
+            # wait until we are actually building the function to
+            # flatten nested expressions.
+            #
+            # TODO(Lunderberg): Make this easier to call.  Infering
+            # struct info for a nested expression should be doable in
+            # a free function, without requiring an active
+            # BlockBuilder and an active FunctionFrame.
+            builder = BlockBuilder()
+            with builder.function("dummy_scope", params=[]):
+                expr = builder.normalize(expr)
+                builder.emit_func_output([])
+        else:
+            expr = BlockBuilder.current().emit(expr, name)
     if isinstance(expr.struct_info_, TensorStructInfo):
         return Tensor(_expr=expr)
     if isinstance(expr.struct_info_, TupleStructInfo):
diff --git a/python/tvm/relax/frontend/nn/exporter.py b/python/tvm/relax/frontend/nn/exporter.py
index 1a7dcd6a648b..525d689f4995 100644
--- a/python/tvm/relax/frontend/nn/exporter.py
+++ b/python/tvm/relax/frontend/nn/exporter.py
@@ -111,7 +111,8 @@ def _effects() -> typing.List[typing.Tuple[str, core.Effect]]:
             return result
 
         # pylint: enable=protected-access
-        params = None
+
+        params = _params()
         effects = _effects()
         ext_mods = self.extern_mods
         with self:
@@ -121,7 +122,6 @@ def _effects() -> typing.List[typing.Tuple[str, core.Effect]]:
                         outputs = _emit_effect_init(self.builder, effects)
                     self.builder.emit_func_output(outputs, params=[])
             for method_name, method_spec in zip(spec.method_names, spec.method_specs):
-                params = _params()  # Re-initialize so symbolic shapes not shared across methods
                 len_args = len(method_spec.arg_specs)
                 len_effects = {
                     "packed": 1,
@@ -135,9 +135,18 @@ def _effects() -> typing.List[typing.Tuple[str, core.Effect]]:
                     with self.builder.dataflow():
                         outputs, inputs = _emit_method(self.builder, method_spec, params, effects)
                     self.builder.emit_func_output(outputs, inputs)
+
+                # TODO(Lunderberg): Make a `ir.transform.ConvertSSA`,
+                # similar to the existing `tir.transform.ConvertSSA`,
+                # that converts an entire module to SSA, including TIR
+                # variable definitions used in either TIR or Relax.
+                mod = self.builder.get()
+                mod[method_name] = rx.utils.copy_with_new_vars(mod[method_name])
+
         mod = self.builder.finalize()
         assert rx.analysis.well_formed(mod)
 
+        mod = rx.transform.CanonicalizeBindings()(mod)
         return mod, params, ext_mods
 
 
@@ -161,8 +170,6 @@ def _emit_method(  # pylint: disable=too-many-locals,too-many-branches,too-many-
     effects: typing.Optional[typing.List[typing.Tuple[str, core.Effect]]],
 ):
     # pylint: disable=protected-access
-    # symbolic shape's name mapping to its tir.Var for reuse
-    str2var_params: typing.Dict[str, tir.Var] = {}
 
     def _unwrap_ret(expr: typing.Any) -> typing.Any:
         if isinstance(expr, (core.Tensor, core.Object)):
@@ -176,35 +183,26 @@ def _unwrap_ret(expr: typing.Any) -> typing.Any:
     def _convert_input(arg):
         if isinstance(arg, tir.Var):
             return rx.Var(arg.name, struct_info=ShapeStructInfo(values=[arg]))
-        if isinstance(arg, (core.Tensor, core.Object)):
+        elif isinstance(arg, (core.Tensor, core.Object)):
             return arg._expr  # pylint: disable=protected-access
-        if isinstance(arg, _spec.Tuple):
+        elif isinstance(arg, _spec.Tuple):
             return rx.Var(
                 arg.name,
                 struct_info=TupleStructInfo(
                     [_convert_input(arg_i).struct_info for arg_i in arg.elements]
                 ),
             )
-        raise TypeError(f"Unsupported input type: {type(arg)}")
+        elif isinstance(arg, rx.Expr):
+            return arg
+        else:
+            raise TypeError(f"Unsupported input type: {type(arg)}")
 
     def _params(mode: str) -> typing.List[rx.Var]:
         inputs: typing.List[rx.Var] = []
 
-        def _get_var(shape_var: tir.Var) -> tir.Var:
-            name = shape_var.name
-            if name in str2var_params:
-                return str2var_params[name]
-            var = tir.Var(name, "int64")
-            str2var_params[name] = var
-            return var
-
         for name, param in params:
-            # Make sure the a symbolic shape is not re-registered (same as _method_spec_to_inputs)
-            # e.g. we do not see `vocab_size` for `lm_head` and `vocab_size_1` for `embed_tokens`
-            new_shape = [_get_var(x) if isinstance(x, tir.Var) else x for x in param.shape]
-            var = core.Tensor.placeholder(new_shape, param.dtype, name)._expr
-            inputs.append(var)
-            param._expr = var
+            inputs.append(param._expr)
+
         if mode == "none":
             return []
         if mode == "plain":
diff --git a/tests/python/relax/test_frontend_nn_exporter.py b/tests/python/relax/test_frontend_nn_exporter.py
new file mode 100644
index 000000000000..de8900238bb6
--- /dev/null
+++ b/tests/python/relax/test_frontend_nn_exporter.py
@@ -0,0 +1,443 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import tvm
+import tvm.testing
+
+from tvm import relax, tir
+from tvm.ir import assert_structural_equal
+from tvm.relax.frontend import nn
+from tvm.script import ir as I, relax as R, tir as T
+
+
+def test_simple():
+    """A module may be exported from nn.Module to Relax"""
+
+    slm_mod = nn.modules.ReLU()
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={"forward": {"x": nn.spec.Tensor((3, 3), "float32")}},
+        debug=False,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward(x: R.Tensor([3, 3], dtype="float32")):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                relu = R.nn.relu(x)
+                R.output(relu)
+            return relu
+
+    assert_structural_equal(exported_mod, Expected)
+
+
+def test_custom_module():
+    """A module may be exported from nn.Module to Relax"""
+
+    class Before(nn.Module):
+        def forward(self, x: R.Tensor):
+            return nn.op.relu(x)
+
+    slm_mod = Before()
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={"forward": {"x": nn.spec.Tensor((3, 3), "float32")}},
+        debug=False,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward(x: R.Tensor([3, 3], dtype="float32")):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                relu = R.nn.relu(x)
+                R.output(relu)
+            return relu
+
+    assert_structural_equal(exported_mod, Expected)
+
+
+def test_debug_effect():
+    """Passing debug=True provides an argument for IO effect"""
+
+    slm_mod = nn.modules.ReLU()
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={"forward": {"x": nn.spec.Tensor((3, 3), "float32")}},
+        debug=True,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward(
+            x: R.Tensor([3, 3], dtype="float32"),
+            _io: R.Object,
+        ):
+            R.func_attr({"num_input": 2})
+            with R.dataflow():
+                relu = R.nn.relu(x)
+                output = relu, (_io,)
+                R.output(output)
+            return output
+
+        @R.function
+        def _initialize_effect():
+            with R.dataflow():
+                _io = R.null_value()
+                output = (_io,)
+                R.output(output)
+            return output
+
+    assert_structural_equal(exported_mod, Expected)
+
+
+def test_dynamic_shape():
+    """An argument may have a dynamic shape"""
+
+    slm_mod = nn.modules.ReLU()
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={"forward": {"x": nn.spec.Tensor([tir.Var("batch_size", "int64"), 8], "float32")}},
+        debug=False,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward(x: R.Tensor(["batch_size", 8], dtype="float32")):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                relu = R.nn.relu(x)
+                R.output(relu)
+            return relu
+
+    assert_structural_equal(exported_mod, Expected)
+
+
+def test_dynamic_shape_in_multiple_functions():
+    """A dynamic shape may be used in multiple functions"""
+
+    class Before(nn.Module):
+        def forward_relu(self, x: nn.Tensor):
+            return nn.relu(x)
+
+        def forward_silu(self, x: nn.Tensor):
+            return nn.silu(x)
+
+    slm_mod = Before()
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={
+            "forward_relu": {"x": nn.spec.Tensor((tir.Var("batch_size", "int64"), 8), "float32")},
+            "forward_silu": {"x": nn.spec.Tensor((tir.Var("batch_size", "int64"), 8), "float32")},
+        },
+        debug=False,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward_relu(x: R.Tensor(["batch_size", 8], dtype="float32")):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                relu = R.nn.relu(x)
+                R.output(relu)
+            return relu
+
+        @R.function
+        def forward_silu(x: R.Tensor(["batch_size", 8], dtype="float32")):
+            R.func_attr({"num_input": 1})
+            with R.dataflow():
+                silu = R.nn.silu(x)
+                R.output(silu)
+            return silu
+
+    assert_structural_equal(exported_mod, Expected)
+
+
+def test_export_nested_module():
+    """nn.Module instances may contain other nn.Module
+
+    When exporting to a Relax IRModule, all `nn.Parameter` instances
+    within the `nn.Module` become Relax function parameters.
+    """
+
+    class LlamaMLP(nn.Module):
+        def __init__(self, hidden_size: int, intermediate_size: int):
+            super().__init__()
+            self.gate_proj = nn.Linear(
+                in_features=hidden_size,
+                out_features=intermediate_size,
+                dtype="float16",
+                bias=False,
+            )
+            self.up_proj = nn.Linear(
+                in_features=hidden_size,
+                out_features=intermediate_size,
+                dtype="float16",
+                bias=False,
+            )
+            self.down_proj = nn.Linear(
+                intermediate_size,
+                hidden_size,
+                dtype="float16",
+                bias=False,
+            )
+
+        def forward(self, x: nn.Tensor):
+            gate = self.gate_proj(x)
+            up = self.up_proj(x)
+            return self.down_proj(nn.op.silu(gate) * up)
+
+    hidden_size = 4096
+    intermediate_size = 11008
+    slm_mod = LlamaMLP(hidden_size=hidden_size, intermediate_size=intermediate_size)
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={
+            "forward": {
+                "x": nn.spec.Tensor((tir.Var("batch_size", "int64"), hidden_size), "float16")
+            },
+        },
+        debug=False,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward(
+            x: R.Tensor(["batch_size", hidden_size], "float16"),
+            gate_proj_weights: R.Tensor([intermediate_size, hidden_size], "float16"),
+            up_proj_weights: R.Tensor([intermediate_size, hidden_size], "float16"),
+            down_proj_weights: R.Tensor([hidden_size, intermediate_size], "float16"),
+        ):
+            R.func_attr({"num_input": 1})
+            batch_size = T.int64()
+            with R.dataflow():
+                gate: R.Tensor([batch_size, intermediate_size]) = R.matmul(
+                    x, R.permute_dims(gate_proj_weights)
+                )
+                up: R.Tensor([batch_size, intermediate_size]) = R.matmul(
+                    x, R.permute_dims(up_proj_weights)
+                )
+                down: R.Tensor([batch_size, hidden_size]) = R.matmul(
+                    R.nn.silu(gate) * up, R.permute_dims(down_proj_weights)
+                )
+                R.output(down)
+            return down
+
+    assert_structural_equal(exported_mod, Expected)
+
+
+def test_generate_parameters():
+    """Weights may be expressions in terms of other parameters
+
+    Optimizations often require preprocessing of the model weights.
+
+    1. Declare the `nn.Module` members that contain the original model
+       weights.  These are used to define the parameter names when
+       reading from a Pytorch or Safetensors file.
+
+    2. Declare the `nn.Module` members, with the `weight` field
+       in terms of the un-optimized weights.  These `nn.Module`
+       do not generate any parameters in the Relax function.
+
+    3. Define the `forward` function in terms of the `nn.Module`
+       members for the updated weight tensors.
+
+    The exported Relax function accepts the original model parameters,
+    computes the pre-processed weights, and then performs computations
+    using the pre-processed weights.
+
+    In this example, the `LiftTransformParams` transform is applied
+    immediately, splitting the Relax function into a pre-processing
+    step and an execution step.  In practice, this transform would be
+    applied much later in an optimization pipeline, to allow optimized
+    compute kernels to be recognized.  For example, in some cases
+    `R.matmul(x, R.permute_dims(weight))` may be computed more
+    efficiently than `R.matmul(x, weight_transpose)`.  For this
+    reason, we do *not* apply `LiftTransformParams` as part of the
+    export from `nn.Module` to Relax.
+
+    """
+
+    class LlamaMLP(nn.Module):
+        def __init__(self, hidden_size: int, intermediate_size: int):
+            super().__init__()
+            # The nn.Linear for the original parameters are present in
+            # the model definition, and are still found when
+            # collecting a function's parameters.
+            self.gate_proj = nn.Linear(
+                in_features=hidden_size,
+                out_features=intermediate_size,
+                dtype="float16",
+                bias=False,
+            )
+            self.up_proj = nn.Linear(
+                in_features=hidden_size,
+                out_features=intermediate_size,
+                dtype="float16",
+                bias=False,
+            )
+            self.down_proj = nn.Linear(
+                intermediate_size,
+                hidden_size,
+                dtype="float16",
+                bias=False,
+            )
+
+            # At runtime, we'd like to have a single concatenated
+            # tensor containing both the gate and up projection
+            # weights.  We also want to use it in the `forward`
+            # function as if it owned its own weights.
+            self.gate_up_proj = nn.Linear(
+                in_features=hidden_size,
+                out_features=intermediate_size,
+                dtype="float16",
+                bias=False,
+            )
+
+            # The weight tensor of `gate_up_proj` can be overwritten
+            # in terms of the original `gate_proj` and `up_proj`
+            # tensors.
+            self.gate_up_proj.weight = nn.op.concat(
+                [self.gate_proj.weight, self.up_proj.weight], dim=0, name="gate_up_proj_weights"
+            )
+
+        def forward(self, x: nn.Tensor):
+            # Even though the `gate_up_proj` weights are defined as an
+            # expression rather than a `nn.Parameter`, the `forward`
+            # function does not require any special handling for it.
+            concat_gate_up = self.gate_up_proj(x)
+            gate, up = nn.op.split(concat_gate_up, 2, axis=-1)
+            return self.down_proj(nn.op.silu(gate) * up)
+
+    hidden_size = 4096
+    intermediate_size = 11008
+    slm_mod = LlamaMLP(hidden_size=hidden_size, intermediate_size=intermediate_size)
+    exported_mod, _ = slm_mod.export_tvm(
+        spec={
+            "forward": {
+                "x": nn.spec.Tensor((tir.Var("batch_size", "int64"), hidden_size), "float16")
+            },
+        },
+        debug=False,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def forward(
+            x: R.Tensor(["batch_size", hidden_size], "float16"),
+            # The function's parameters are defined by the
+            # `nn.Parameter` instances, and still reference the
+            # original `gate_proj` and `up_proj` weights.  This
+            # maintains compatibility with named model weights in a
+            # Pytorch or Safetensors file.
+            gate_proj_weights: R.Tensor([intermediate_size, hidden_size], "float16"),
+            up_proj_weights: R.Tensor([intermediate_size, hidden_size], "float16"),
+            down_proj_weights: R.Tensor([hidden_size, intermediate_size], "float16"),
+        ):
+            R.func_attr({"num_input": 1})
+            batch_size = T.int64()
+            with R.dataflow():
+                # At this stage of compilation, the concatenation is
+                # written within the body of the function.  This will
+                # later be extracted into a pre-processing step using
+                # `relax.transform.LiftTransformParams`.
+                gate_up_proj_weights: R.Tensor(
+                    [intermediate_size * 2, hidden_size], "float16"
+                ) = R.concat([gate_proj_weights, up_proj_weights], axis=0)
+                gate_up: R.Tensor([batch_size, intermediate_size * 2], "float16") = R.matmul(
+                    x, R.permute_dims(gate_up_proj_weights)
+                )
+                gate_up_split = R.split(gate_up, 2, axis=-1)
+                gate = gate_up_split[0]
+                up = gate_up_split[1]
+                down: R.Tensor([batch_size, hidden_size], "float16") = R.matmul(
+                    R.nn.silu(gate) * up, R.permute_dims(down_proj_weights)
+                )
+                R.output(down)
+            return down
+
+    assert_structural_equal(exported_mod, Expected)
+
+    @I.ir_module
+    class ExpectedAfterLift:
+        @R.function
+        def forward(
+            x: R.Tensor(["batch_size", hidden_size], "float16"),
+            # After `relax.transform.LiftTransformParams`, the
+            # `gate_proj` and `up_proj` weights have been concatenated
+            # together.
+            gate_up_proj_weights_transpose: R.Tensor(
+                [hidden_size, intermediate_size * 2], "float16"
+            ),
+            down_proj_weights_transpose: R.Tensor([intermediate_size, hidden_size], "float16"),
+        ):
+            R.func_attr({"num_input": 1})
+            batch_size = T.int64()
+            with R.dataflow():
+                gate_up: R.Tensor([batch_size, intermediate_size * 2], "float16") = R.matmul(
+                    x, gate_up_proj_weights_transpose
+                )
+                gate_up_split = R.split(gate_up, 2, axis=-1)
+                gate = gate_up_split[0]
+                up = gate_up_split[1]
+                down: R.Tensor([batch_size, hidden_size], "float16") = R.matmul(
+                    R.nn.silu(gate) * up, down_proj_weights_transpose
+                )
+                R.output(down)
+            return down
+
+        @R.function
+        def transform_params(
+            model_params: R.Tuple(
+                R.Tensor([intermediate_size, hidden_size], "float16"),
+                R.Tensor([intermediate_size, hidden_size], "float16"),
+                R.Tensor([hidden_size, intermediate_size], "float16"),
+            )
+        ):
+            R.func_attr({"num_input": 0})
+            with R.dataflow():
+                gate_proj_weights: R.Tensor(
+                    [intermediate_size, hidden_size], "float16"
+                ) = model_params[0]
+                up_proj_weights: R.Tensor(
+                    [intermediate_size, hidden_size], "float16"
+                ) = model_params[1]
+                gate_up_proj_weights: R.Tensor(
+                    [intermediate_size * 2, hidden_size], "float16"
+                ) = R.concat([gate_proj_weights, up_proj_weights], axis=0)
+                gate_up_proj_weights_transpose: R.Tensor(
+                    [hidden_size, intermediate_size * 2], "float16"
+                ) = R.permute_dims(gate_up_proj_weights)
+                down_proj_weights: R.Tensor(
+                    [hidden_size, intermediate_size], "float16"
+                ) = model_params[2]
+                down_proj_weights_transpose: R.Tensor(
+                    [intermediate_size, hidden_size], "float16"
+                ) = R.permute_dims(down_proj_weights)
+                output = (gate_up_proj_weights_transpose, down_proj_weights_transpose)
+                R.output(output)
+            return output
+
+    lifted_mod = relax.transform.LiftTransformParams(shared_transform=True)(exported_mod)
+    assert_structural_equal(lifted_mod, ExpectedAfterLift)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/relax/test_frontend_nn_extern_module.py b/tests/python/relax/test_frontend_nn_extern_module.py
index 6eaf1fbfc805..6ca774242274 100644
--- a/tests/python/relax/test_frontend_nn_extern_module.py
+++ b/tests/python/relax/test_frontend_nn_extern_module.py
@@ -94,9 +94,8 @@ def scalar_add(
                 ext_scalar_add = R.call_dps_packed(
                     "ext_scalar_add", (a, b), out_sinfo=R.Tensor((), dtype="float32")
                 )
-                gv: R.Tensor((), dtype="float32") = ext_scalar_add
-                R.output(gv)
-            return gv
+                R.output(ext_scalar_add)
+            return ext_scalar_add
 
         @R.function
         def test_sym(
@@ -110,9 +109,8 @@ def test_sym(
                 ext_test_sym = R.call_dps_packed(
                     "ext_test_sym", (a, b), out_sinfo=R.Tensor((x, y, z, 9), dtype="float32")
                 )
-                gv1: R.Tensor((x, y, z, 9), dtype="float32") = ext_test_sym
-                R.output(gv1)
-            return gv1
+                R.output(ext_test_sym)
+            return ext_test_sym
 
     tvm.ir.assert_structural_equal(ExpectedModule, mod)
 
diff --git a/tests/python/relax/test_frontend_nn_modules.py b/tests/python/relax/test_frontend_nn_modules.py
index 5ddc10505591..45128749e23d 100644
--- a/tests/python/relax/test_frontend_nn_modules.py
+++ b/tests/python/relax/test_frontend_nn_modules.py
@@ -493,8 +493,7 @@ def _initialize_effect() -> R.Tuple(R.Object, R.Object):
                     R.prim_value(0),
                     sinfo_args=[R.Object()],
                 )
-                lv1 = _io, cache
-                gv = lv1
+                gv = _io, cache
                 R.output(gv)
             return gv
 
diff --git a/tests/python/relax/test_frontend_nn_op.py b/tests/python/relax/test_frontend_nn_op.py
index 7d78e47c945b..68f86bba50e8 100644
--- a/tests/python/relax/test_frontend_nn_op.py
+++ b/tests/python/relax/test_frontend_nn_op.py
@@ -538,8 +538,7 @@ def add_one(A: T.Buffer((T.int64(10), T.int64(10)), "float32"), T_add: T.Buffer(
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -611,8 +610,7 @@ def llama_fused_rope(var_qkv: T.handle, offset: T.int64, var_q: T.handle, var_k:
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -699,8 +697,7 @@ def inplace_take(
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -717,13 +714,12 @@ def test(
             R.func_attr({"num_input": 4})
             cls = Expected
             with R.dataflow():
-                lv1 = R.call_tir(
+                gv1 = R.call_tir(
                     cls.inplace_take,
                     (embedding_table, input_ids, embedding_dst),
                     out_sinfo=R.Tensor((total_seq_len, hidden_size), dtype),
                     tir_vars=R.shape([offset_1]),
                 )
-                gv1: R.Tensor((total_seq_len, hidden_size), dtype) = lv1
                 R.output(gv1)
             return gv1
 
@@ -772,8 +768,7 @@ def test(A: R.Tensor((16, 16), dtype="float32")) -> R.Tensor((16, 16), dtype="fl
             R.func_attr({"num_input": 1})
             cls = Expected
             with R.dataflow():
-                lv = R.call_tir(cls.tir_func, (A,), out_sinfo=R.Tensor((16, 16), dtype="float32"))
-                gv: R.Tensor((16, 16), dtype="float32") = lv
+                gv = R.call_tir(cls.tir_func, (A,), out_sinfo=R.Tensor((16, 16), dtype="float32"))
                 R.output(gv)
             return gv
 
@@ -800,8 +795,7 @@ class Expected:
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -888,8 +882,7 @@ def get_sample_index(A: T.handle, B: T.handle, C: T.handle, D: T.handle):
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -1015,8 +1008,7 @@ def get_renorm_prob(A: T.handle, B: T.handle, C: T.handle, D: T.handle):
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv: R.Tuple(R.Object) = (_io,)
                 R.output(gv)
             return gv
 
@@ -1130,8 +1122,7 @@ def get_renorm_cutoff(A: T.handle, B: T.handle, C: T.handle, D: T.handle, E: T.h
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv: R.Tuple(R.Object) = (_io,)
                 R.output(gv)
             return gv
 
diff --git a/tests/python/relax/test_frontend_nn_packing.py b/tests/python/relax/test_frontend_nn_packing.py
index 56b614a807b8..c2cc22c17d40 100644
--- a/tests/python/relax/test_frontend_nn_packing.py
+++ b/tests/python/relax/test_frontend_nn_packing.py
@@ -59,8 +59,7 @@ def forward(
                 matmul = R.matmul(x, matmul_1_weight)
                 matmul_2_weight = R.permute_dims(linear_2_weight)
                 matmul1 = R.matmul(x, matmul_2_weight)
-                add = R.add(matmul, matmul1)
-                gv = add
+                gv = R.add(matmul, matmul1)
                 R.output(gv)
             return gv
 
diff --git a/tests/python/relax/test_frontend_nn_subroutines.py b/tests/python/relax/test_frontend_nn_subroutines.py
index 6bbf57aeadde..32ae967916a8 100644
--- a/tests/python/relax/test_frontend_nn_subroutines.py
+++ b/tests/python/relax/test_frontend_nn_subroutines.py
@@ -61,8 +61,7 @@ def forward(
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
 
             return gv
@@ -75,9 +74,8 @@ def layer(
             with R.dataflow():
                 state = R.matmul(state, weights)
                 state = Expected.activation(state)
-                dataflow_output = state
-                R.output(dataflow_output)
-            return dataflow_output
+                R.output(state)
+            return state
 
         @R.function(private=True)
         def activation(
@@ -85,9 +83,8 @@ def activation(
         ) -> R.Tensor(("batch_size", 32), dtype="float32"):
             with R.dataflow():
                 state = R.nn.silu(state)
-                dataflow_output = state
-                R.output(dataflow_output)
-            return dataflow_output
+                R.output(state)
+            return state
 
     mod = Layer(64, 32)
     batch_size = tvm.tir.Var("batch_size", "int64")