From b757756ea71341bc2a20872153754e5932658130 Mon Sep 17 00:00:00 2001
From: "lcy.seso" <lcy.seso@gmail.com>
Date: Fri, 13 Sep 2024 05:23:39 +0000
Subject: [PATCH] backup changes.

---
 .../tvm/bigbird_tvm.py                        |   2 +-
 .../rnn/baselines/grid_lstm/gridlstm_pt.py    |   8 +-
 .../rnn/baselines/grid_lstm/gridlstm_tf.py    | 201 ++++++++++++------
 .../baselines/grid_lstm/run_grid_lstm_pt.sh   |  36 ++--
 .../stacked_drnn_tensorflow_graph.py          |   9 +-
 .../baselines/stacked_dilated_rnn/utils.py    |   2 +-
 .../stacked_lstm_tensorflow_eager.py          |  27 ++-
 .../stacked_lstm_tensorflow_graph.py          |  26 +++
 8 files changed, 222 insertions(+), 89 deletions(-)

diff --git a/artifacts/FractalTensor/benchmarks/blocked_sparse_attention/tvm/bigbird_tvm.py b/artifacts/FractalTensor/benchmarks/blocked_sparse_attention/tvm/bigbird_tvm.py
index 5496d6568..5c7e9136f 100644
--- a/artifacts/FractalTensor/benchmarks/blocked_sparse_attention/tvm/bigbird_tvm.py
+++ b/artifacts/FractalTensor/benchmarks/blocked_sparse_attention/tvm/bigbird_tvm.py
@@ -245,7 +245,7 @@ def bigbird_wv_SPMM_reduce(N, L, B, H, W, R, G, dtype):
         verbose=2,
     )
 
-    # tuner.tune(tune_option)
+    tuner.tune(tune_option)
 
     funcs = []
 
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_pt.py b/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_pt.py
index 1996f60df..6f14e63e8 100644
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_pt.py
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_pt.py
@@ -52,10 +52,10 @@ class PytorchGrid(unittest.TestCase):
     OUTPUT_FILE = cmd_args.output_file
     DEFAULT_TEST = cmd_args.default_test
 
-    if OUTPUT_FILE:
-        with open(OUTPUT_FILE, 'w') as fout:
-            fout.write(
-                "depth\t[seq_length, batch_size, hidden_size]\tPyTorch(ms)\n")
+    # if OUTPUT_FILE:
+    #     with open(OUTPUT_FILE, 'w') as fout:
+    #         fout.write(
+    #             "depth\t[seq_length, batch_size, hidden_size]\tPyTorch(ms)\n")
 
     LOG_DEBUG_INFO = 1
     PROFILER_ENABLE = 0
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_tf.py b/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_tf.py
index 0454d2b51..cc0a81201 100644
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_tf.py
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/gridlstm_tf.py
@@ -4,6 +4,7 @@
 import unittest
 import os
 import logging
+import argparse
 import datetime
 
 import test_utils as tu
@@ -17,6 +18,17 @@
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
 
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v in ('True'):
+        return True
+    elif v in ('False'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
 def parse_test_args():
     parser = argparse.ArgumentParser(description='Girdlstm')
     parser.add_argument(
@@ -26,67 +38,72 @@ def parse_test_args():
     parser.add_argument(
         '--hidden_size', type=int, help='Hidden size', default=256)
     parser.add_argument('--depth', type=int, help='Depth size', default=4)
+    parser.add_argument(
+        '--output_file', type=str, help='Output file path', default=None)
+    parser.add_argument(
+        '--default_test',
+        type=str2bool,
+        help='Whether to run the default test',
+        default=False)
     return parser.parse_args()
 
 class TFGraphGridLSTM(unittest.TestCase):
     WARM_UP = 5
-    ITERS = 10
+    ITERS = 1
 
     cmd_args = parse_test_args()
     SEQ_LEN = cmd_args.seq_len
     BATCH_SIZE = cmd_args.batch_size
-    HIDDEN_SIZE = cmd_args.hidden_size
-    DEPTH = cmd_args.depth
+    HIDDEN = cmd_args.hidden_size
+    NUM_LAYERS = cmd_args.depth
+    OUTPUT_FILE = cmd_args.output_file
+    DEFAULT_TEST = cmd_args.default_test
+
+    # if OUTPUT_FILE:
+    #     with open(OUTPUT_FILE, 'w') as fout:
+    #         fout.write(
+    #             "depth\t[seq_length, batch_size, hidden_size]\tTensorFlow(ms)\n")
 
     LOG_DEBUG_INFO = 1
     PROFILER_ENABLE = 0
 
     def setUp(self):
         tf.compat.v2.random.set_seed(1234)
-        self._init_logger()
 
         self.stddev = 1.0 / math.sqrt(TFGraphGridLSTM.HIDDEN)
         self.shape = (TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
                       TFGraphGridLSTM.HIDDEN)
 
-    def _init_logger(self):
-        self.logger = logging.getLogger()
-        logging.basicConfig(
-            level=(logging.DEBUG
-                   if TFGraphGridLSTM.LOG_DEBUG_INFO else logging.INFO),
-            filename="grid_lstm_results_tensorflow_graph.txt",
-            filemode="w",
-            format="%(message)s")
-
-    def _report(self, test_name, start):
-        """
-        Args:
-            test_name (String): Name of the test.
-            start (String): Timestamp of the start time.
-        """
+    def _report(self, test_name, test_case, start):
+        seq_len, batch_size, hidden, num_layers = test_case
         elapsed_time = time.time() - start
         average_time = elapsed_time / TFGraphGridLSTM.ITERS
-        seq_per_sec = (
-            TFGraphGridLSTM.ITERS * TFGraphGridLSTM.BATCH_SIZE) / elapsed_time
-        self.logger.info(("|%s|%.4f\t|%.4f\t|%.4f|") %
-                         (test_name, average_time, elapsed_time, seq_per_sec))
-        print((
-            "|test_name = %s|average_time = %.4f s|elapsed_time = %.4f s|seq_per_sec = %.4f|"
-        ) % (test_name, average_time, elapsed_time, seq_per_sec))
-
-    def _apply_forward(self, dev, test_name, model):
+
+        print(f"\nbench-grid\tdepth\t{num_layers}\tseq_length\t{seq_len}\t"
+              f"batch_size\t{batch_size}\t"
+              f"hidden_size\t{hidden}\tTensorFlow(ms)\t{average_time * 1000}")
+
+        if self.OUTPUT_FILE:
+            with open(self.OUTPUT_FILE, 'a') as fout:
+                fout.write(
+                    f"{num_layers}\t[{seq_len}, {seq_len}, {batch_size}, {hidden}]\t"
+                    f"{average_time * 1000}\n")
+
+    def _apply_forward(self, dev, test_name, test_case, model):
         """Only Test the forward computation.
         Args:
             dev, String: Device that on which the test is running. cpu or gpu.
             test_name, String: Name of the test.
             model, Callable: The tested model. It should be a callable object.
         """
-
+        seq_len, batch_size, hidden, num_layers = test_case
+        shape = (seq_len, batch_size, hidden)
+        stddev = 1.0 / math.sqrt(hidden)
         with tf.device(tu.device(dev)):
             source = tf.random.uniform(
-                self.shape, minval=-self.stddev, maxval=self.stddev)
+                shape, minval=-stddev, maxval=stddev)
             target = tf.random.uniform(
-                self.shape, minval=-self.stddev, maxval=self.stddev)
+                shape, minval=-stddev, maxval=stddev)
 
             output = model(source, target)
 
@@ -108,41 +125,97 @@ def _apply_forward(self, dev, test_name, model):
                 if TFGraphGridLSTM.PROFILER_ENABLE:
                     tf.profiler.experimental.stop()
 
-            self._report(test_name, start)
+            self._report(test_name, test_case, start)
 
     def test_fine_grained_op_lstm_forward(self):
-        for device in [
-                "cpu",
-                "gpu",
-        ]:
-            model = FineGrainedOpGridLSTMNet(
-                TFGraphGridLSTM.NUM_LAYERS, TFGraphGridLSTM.SEQ_LEN,
-                TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
-                TFGraphGridLSTM.HIDDEN)
-            self._apply_forward(
-                device, f"graph_finegrained_op_lstm_{device}_forward", model)
-
-    def test_while_op_lstm_forward(self):
-        for device in [
-                "cpu",
-                "gpu",
-        ]:
-            model = WhileOpGridLSTMNet(
-                TFGraphGridLSTM.NUM_LAYERS, TFGraphGridLSTM.SEQ_LEN,
-                TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
-                TFGraphGridLSTM.HIDDEN)
-            self._apply_forward(device,
-                                f"graph_while_op_lstm_{device}_forward", model)
-
-    def test_base_while_op_lstm_forward(self):
-        for device in [
-                "cpu",
-                "gpu",
-        ]:
-            model = BaseWhileOpGridLSTMNet(TFGraphGridLSTM.HIDDEN)
-            self._apply_forward(
-                device, f"graph_base_while_op_lstm_{device}_forward", model)
-
+        if not self.DEFAULT_TEST:
+            for device in [
+                    # "cpu",
+                    "gpu",
+            ]:
+                model = FineGrainedOpGridLSTMNet(
+                    TFGraphGridLSTM.NUM_LAYERS, TFGraphGridLSTM.SEQ_LEN,
+                    TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
+                    TFGraphGridLSTM.HIDDEN)
+                test_case = [
+                            TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
+                            TFGraphGridLSTM.HIDDEN, TFGraphGridLSTM.NUM_LAYERS
+                        ]
+                self._apply_forward(
+                    device, f"graph_finegrained_op_lstm_{device}_forward", test_case, model)
+
+    # def test_while_op_lstm_forward(self):
+    #     if not self.DEFAULT_TEST:
+    #         for device in [
+    #                 # "cpu",
+    #                 "gpu",
+    #         ]:
+    #             model = WhileOpGridLSTMNet(
+    #                 TFGraphGridLSTM.NUM_LAYERS, TFGraphGridLSTM.SEQ_LEN,
+    #                 TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
+    #                 TFGraphGridLSTM.HIDDEN)
+    #             test_case = [
+    #                     TFGraphGridLSTM.SEQ_LEN, TFGraphGridLSTM.BATCH_SIZE,
+    #                     TFGraphGridLSTM.HIDDEN, TFGraphGridLSTM.NUM_LAYERS
+    #                 ]
+    #             self._apply_forward(device,
+    #                                 f"graph_while_op_lstm_{device}_forward", test_case, model)
+
+    # def test_base_while_op_lstm_forward(self):
+    #     for device in [
+    #             # "cpu",
+    #             "gpu",
+    #     ]:
+    #         model = BaseWhileOpGridLSTMNet(TFGraphGridLSTM.HIDDEN)
+    #         self._apply_forward(
+    #             device, f"graph_base_while_op_lstm_{device}_forward", model)
+
+    def test_default_data(self):
+        if self.DEFAULT_TEST:
+            test_name = f"gridlstm_gpu_forward_TensorFlow"
+            print("default test:", test_name)
+
+            def build_data(test_case):
+                seq_len, batch_size, hidden, num_layers = test_case
+                model = FineGrainedOpGridLSTMNet(
+                    num_layers, seq_len,
+                    seq_len, batch_size,
+                    hidden)
+                return model
+
+            test_cases = [
+                # overall
+                # [seq_len, batch_size, hidden, num_layers]
+                [10, 32, 256, 32],
+                [10, 32, 512, 32],
+                [10, 32, 1024, 32],
+
+                # scale with depth
+                [10, 32, 256, 1],
+                [10, 32, 256, 2],
+                [10, 32, 256, 4],
+                [10, 32, 256, 8],
+                [10, 32, 256, 16],
+                [10, 32, 256, 32],
+                [10, 32, 1024, 1],
+                [10, 32, 1024, 2],
+                [10, 32, 1024, 4],
+                [10, 32, 1024, 8],
+                [10, 32, 1024, 16],
+                [10, 32, 1024, 32],
+
+                # scale with length
+                [5, 32, 256, 32],
+                [7, 32, 256, 32],
+                [10, 32, 256, 32],
+                [5, 32, 1024, 32],
+                [7, 32, 1024, 32],
+                [10, 32, 1024, 32],
+            ]
+
+            for test_case in test_cases:
+                model = build_data(test_case)
+                self._apply_forward('gpu' ,test_name, test_case, model)
 
 if __name__ == "__main__":
     tf.compat.v1.disable_eager_execution()
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/run_grid_lstm_pt.sh b/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/run_grid_lstm_pt.sh
index c110c9541..62f4987d3 100755
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/run_grid_lstm_pt.sh
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/grid_lstm/run_grid_lstm_pt.sh
@@ -3,13 +3,18 @@
 seq_len=10
 batch_size=32
 
+root_dir=$(pwd)
+
+log_dir="$root_dir/../../../../../logs"
+echo $'depth\t[seq_length, batch_size, hidden_size]\tPyTorch(ms)' > $log_dir/pt_grid_lstm.tsv
+
 # overall
 hiddens='256 512 1024'
 for hidden in $hiddens; do
-    python3 gridlstm_pt.py --seq_len=$seq_len \
-        --batch_size=$batch_size \
-        --hidden_size=$hidden \
-        --depth=32
+    python3 gridlstm_pt.py --seq_len $seq_len \
+        --batch_size $batch_size \
+        --hidden_size $hidden \
+        --depth 32 --output_file $log_dir/pt_grid_lstm.tsv
 done
 
 # scale with depth
@@ -17,21 +22,22 @@ depths='1 2 4 8 16 32'
 hiddens='256 1024'
 for hidden in $hiddens; do
     for depth in $depths; do
-        python3 gridlstm_pt.py --seq_len=$seq_len \
-            --batch_size=$batch_size \
-            --hidden_size=$hidden \
-            --depth=$depth
+        python3 gridlstm_pt.py --seq_len $seq_len \
+            --batch_size $batch_size \
+            --hidden_size $hidden \
+            --depth $depth --output_file $log_dir/pt_grid_lstm.tsv
     done
 done
 
 # scale with length
 lengths='5 7 10'
 hiddens='256 1024'
-for length in $lengths; do
-    for hidden in $hiddens; do
-        python3 gridlstm_pt.py --seq_len=$seq_len \
-            --batch_size=32 \
-            --hidden_size=$hidden \
-            --depth=32
+
+for hidden in $hiddens; do
+    for length in $lengths; do
+        python3 gridlstm_pt.py --seq_len $length \
+            --batch_size 32 \
+            --hidden_size $hidden \
+            --depth 32 --output_file $log_dir/pt_grid_lstm.tsv
     done
-done
+done
\ No newline at end of file
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/stacked_drnn_tensorflow_graph.py b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/stacked_drnn_tensorflow_graph.py
index 1cd08d2d3..09c87fb73 100644
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/stacked_drnn_tensorflow_graph.py
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/stacked_drnn_tensorflow_graph.py
@@ -18,7 +18,6 @@ def setUp(self):
         self.stddev = 1.0 / math.sqrt(HIDDEN_SIZE)
 
         self.log_dir = ''
-        self.logger = init_logger(self.log_dir, 'tensorflow_drnn_graph.txt')
 
     def test_drnn_forward(self):
         shape = (SEQ_LEN, BATCH_SIZE, INPUT_SIZE)
@@ -29,7 +28,7 @@ def test_drnn_forward(self):
 
         with tf.compat.v1.Session() as sess:
             for device in [
-                    'cpu',
+                    # 'cpu',
                     '/device:GPU:0',
             ]:
                 with tf.device(device):
@@ -70,8 +69,12 @@ def test_drnn_forward(self):
                                 inputs: x_data,
                                 pads: padding_data
                             })
+                    elapsed_time = time() - start
+                    average_time = elapsed_time / ITERS
                     test_name = f'TensorFlow_Stacked_DLSTM_graph_{device}'
-                    report(test_name, start, self.logger)
+                    test_case = [SEQ_LEN, BATCH_SIZE, HIDDEN_SIZE, NUM_LAYERS]
+                    report(test_name, test_case, OUTPUT_FILE, average_time * 1000)
+                    # report(test_name, start, self.logger)
 
 
 if __name__ == '__main__':
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/utils.py b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/utils.py
index 3e89519d9..b5a60b079 100644
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/utils.py
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_dilated_rnn/utils.py
@@ -58,7 +58,7 @@ def parse_test_args():
 if not DEFAULT_TEST:
     DILATION = DILATION[0:NUM_LAYERS]
 
-ITERS = 10
+ITERS = 1
 WARMUP = 5
 LOG_DEBUG_INFO = 0
 
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_eager.py b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_eager.py
index 54bb47a2b..dfa3e974f 100644
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_eager.py
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_eager.py
@@ -175,6 +175,32 @@ def build_model(test_case):
                 return GraphModeModel
 
             test_cases = [
+                # overall
+                # [seq_len, batch_size, hidden, num_layers]
+                [64, 256, 256, 32],
+                [64, 256, 512, 32],
+                [64, 256, 1024, 32],
+                # scale with depth
+                [64, 256, 256, 1],
+                [64, 256, 256, 2],
+                [64, 256, 256, 4],
+                [64, 256, 256, 8],
+                [64, 256, 256, 16],
+                [64, 256, 256, 32],
+                [64, 256, 1024, 1],
+                [64, 256, 1024, 2],
+                [64, 256, 1024, 4],
+                [64, 256, 1024, 8],
+                [64, 256, 1024, 16],
+                [64, 256, 1024, 32],
+                # scale with length
+                [32, 256, 256, 32],
+                [64, 256, 256, 32],
+                [128, 256, 256, 32],
+                [32, 256, 1024, 32],
+                [64, 256, 1024, 32],
+                [128, 256, 1024, 32],
+                # figure 2
                 [64, 256, 256, 1],
                 [64, 256, 256, 4],
                 [64, 256, 256, 8],
@@ -182,7 +208,6 @@ def build_model(test_case):
                 [64, 256, 256, 16],
                 [64, 256, 256, 20],
             ]
-
             if self.OUTPUT_FILE:
                 with open(self.OUTPUT_FILE, 'w') as fout:
                     fout.write(
diff --git a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_graph.py b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_graph.py
index 54fd0d5f2..cd947e00b 100644
--- a/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_graph.py
+++ b/artifacts/FractalTensor/benchmarks/rnn/baselines/stacked_lstm/stacked_lstm_tensorflow_graph.py
@@ -195,6 +195,32 @@ def build_model2(test_case):
                 return GraphModeModel
 
             test_cases = [
+                # overall
+                # [seq_len, batch_size, hidden, num_layers]
+                [64, 256, 256, 32],
+                [64, 256, 512, 32],
+                [64, 256, 1024, 32],
+                # scale with depth
+                [64, 256, 256, 1],
+                [64, 256, 256, 2],
+                [64, 256, 256, 4],
+                [64, 256, 256, 8],
+                [64, 256, 256, 16],
+                [64, 256, 256, 32],
+                [64, 256, 1024, 1],
+                [64, 256, 1024, 2],
+                [64, 256, 1024, 4],
+                [64, 256, 1024, 8],
+                [64, 256, 1024, 16],
+                [64, 256, 1024, 32],
+                # scale with length
+                [32, 256, 256, 32],
+                [64, 256, 256, 32],
+                [128, 256, 256, 32],
+                [32, 256, 1024, 32],
+                [64, 256, 1024, 32],
+                [128, 256, 1024, 32],
+                # figure 2
                 [64, 256, 256, 1],
                 [64, 256, 256, 4],
                 [64, 256, 256, 8],