Graph-Learning-Benchmarks · Jn-Huang · Nov 29, 2022 · Nov 29, 2022 · Nov 29, 2022 · Nov 29, 2022
diff --git a/benchmarks/NodeRegression/README.md b/benchmarks/NodeRegression/README.md
@@ -0,0 +1,60 @@
+# GLI Benchmarking on `NodeRegrssion` Task
+
+The code in this folder can be used to benchmark some popular models on `NodeRegrssion` task.
+
+## How to run
+
+Example commands to run the code:
+
+```bash
+# full batch
+python train.py --dataset <dataset> --model GCN
+python train.py --dataset <dataset> --model MLP
+python train.py --dataset <dataset> --model GAT --model-cfg configs/GAT.yaml
+python train.py --dataset <dataset> --model GraphSAGE --model-cfg configs/GraphSAGE.yaml
+python train.py --dataset <dataset> --model MoNet --model-cfg configs/MoNet.yaml
+python train.py --dataset <dataset> --model MixHop --model-cfg configs/MixHop.yaml
+python train.py --dataset <dataset> --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml
+
+# mini batch
+python train_minibatch.py --dataset <dataset> --model GCN_minibatch
+
+# GBDT
+python train_gbdt.py --dataset <dataset>  --model lightgbm
+python train_gbdt.py --dataset <dataset>  --model catboost
+```
+
+One can provide a `yaml` file to arguments `--model-cfg` or `--train-cfg` respectively for model configuration or training configuration. If not provided, default configurations (see [model_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/NodeRegression/configs/model_default.yaml) and [train_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/NodeRegression/configs/train_default.yaml)) will be used. 
+
+Note that some models may have unique hyperparameters not included in the default configuration files. In this case, one should pass the model-specific coniguration files to `train.py`.
+
+## Supported models
+
+The following list of models are supported by this benchmark.
+
+### Full batch
+
+- `GCN`
+- `MLP`
+- `GAT`
+- `GraphSAGE`
+- `MoNet`
+- `MixHop`
+- `LINKX`
+
+### Mini batch
+
+- `GCN_minibatch`
+
+### Gradient Boosting Decision Tree (GBDT)
+
+- `catboost`
+- `lightgbm`
+
+To add a new model, one should add the model implementation under the `models` folder, and add model specific confgurations under the `configs` folder when needed. We have tried to implement `train.py` in a generic way so one may only need to make minimal modifications to `train.py` and `utils.py`.
+
+Contributions of new models are welcome through pull requests.
+
+## Supported datasets
+
+No `NodeRegrssion` datasets available now.
diff --git a/benchmarks/NodeRegression/config_gen.py b/benchmarks/NodeRegression/config_gen.py
@@ -0,0 +1,75 @@
+"""
+Random search.
+
+References:
+https://github.com/pyg-team/pytorch_geometric/blob/master/graphgym/configs_gen.py
+https://github.com/pyg-team/pytorch_geometric/blob/master/torch_geometric/
+graphgym/utils/io.py
+"""
+
+import argparse
+import yaml
+import time
+from utils import load_config_file, makedirs_rm_exist
+from random import randint
+
+train_cfg_list = ["self_loop", "to_dense", "lr", "weight_decay", "num_trials",
+                  "max_epoch", "early_stopping"]
+
+
+def parse_args():
+    """Parse the arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model-cfg", type=str,
+                        default="configs/model_default.yaml",
+                        help="The model configuration file path.")
+    parser.add_argument("--train-cfg", type=str,
+                        default="configs/train_default.yaml",
+                        help="The training configuration file path.")
+    parser.add_argument("--grid", type=str,
+                        help="configuration file for grid search.",
+                        default="grid/grid_example.yaml")
+    parser.add_argument("--sample_num", dest="sample_num",
+                        help="Number of random samples in the space.",
+                        default=10, type=int)
+    parser.add_argument("--trial_num", type=int, default=5,
+                        help="Number of trials for same configuration.")
+    parser.add_argument("--model", type=str, default="GCN",
+                        help="model to be used. GCN, GAT, MoNet,\
+                              GraphSAGE, MLP for now.")
+    return parser.parse_args()
+
+
+def grid_gen(args, gen_cfg, model_cfg, train_cfg):
+    """Generate random search configuration files."""
+    dir_name = "./grid/" + args.model + time.strftime("_%Y%m%d_%H%M%S")
+    makedirs_rm_exist(dir_name)
+    for i in range(args.sample_num):
+        for key in gen_cfg:
+            key_len = len(gen_cfg[key])
+            if key in train_cfg_list:
+                train_cfg[key] = gen_cfg[key][randint(0, key_len-1)]
+            else:
+                # otherwise, the key is for model
+                model_cfg[key] = gen_cfg[key][randint(0, key_len-1)]
+        for j in range(args.trial_num):
+            index_str = str(i) + "_" + str(j)
+            # the i-th configuration, j-th trial
+            train_cfg_name = args.model + "_train_" + index_str + ".yaml"
+            model_cfg_name = args.model + "_model_" + index_str + ".yaml"
+            train_cfg["seed"] = randint(1, 10000)
+            with open(dir_name + "/" + train_cfg_name,
+                      "w", encoding="utf-8") as f:
+                yaml.dump(train_cfg, f, default_flow_style=False)
+            with open(dir_name + "/" + model_cfg_name,
+                      "w", encoding="utf-8") as f:
+                yaml.dump(model_cfg, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    Args = parse_args()
+    Gen_cfg = load_config_file(Args.grid)
+    # load default configuration for training and model
+    Model_cfg = load_config_file(Args.model_cfg)
+    Train_cfg = load_config_file(Args.train_cfg)
+    grid_gen(Args, Gen_cfg, Model_cfg, Train_cfg)
diff --git a/benchmarks/NodeRegression/configs/GAT.yaml b/benchmarks/NodeRegression/configs/GAT.yaml
@@ -0,0 +1,7 @@
+num_layers: 2
+num_hidden: 8
+num_heads: 8
+num_out_heads: 2
+residual: False
+dropout: .6
+negative_slope: .2
diff --git a/benchmarks/NodeRegression/configs/GraphSAGE.yaml b/benchmarks/NodeRegression/configs/GraphSAGE.yaml
@@ -0,0 +1,4 @@
+num_layers: 2
+num_hidden: 8
+dropout: .6
+aggregator_type: gcn
diff --git a/benchmarks/NodeRegression/configs/LINKX.yaml b/benchmarks/NodeRegression/configs/LINKX.yaml
@@ -0,0 +1,7 @@
+num_hidden: 16
+num_layers: 1
+dropout: .5
+inner_activation: False
+inner_dropout: False
+init_layers_A: 1
+init_layers_X: 1
diff --git a/benchmarks/NodeRegression/configs/LINKX_train.yaml b/benchmarks/NodeRegression/configs/LINKX_train.yaml
@@ -0,0 +1,11 @@
+loss_fun: cross_entropy
+self_loop: False
+to_dense: False
+lr: .01
+weight_decay: 0.001
+max_epoch: 10000
+early_stopping: True
+seed: 0
+batch_size: 256
+to_undirected: False
+optimizer: "AdamW"
diff --git a/benchmarks/NodeRegression/configs/MixHop.yaml b/benchmarks/NodeRegression/configs/MixHop.yaml
@@ -0,0 +1,6 @@
+num_hidden: 8
+p: [0, 1, 2]
+num_layers: 2
+dropout: .5
+layer_dropout: 0.9
+batchnorm: False
diff --git a/benchmarks/NodeRegression/configs/MoNet.yaml b/benchmarks/NodeRegression/configs/MoNet.yaml
@@ -0,0 +1,5 @@
+num_layers: 2
+num_hidden: 8
+dropout: .6
+pseudo_dim: 2
+num_kernels: 3
diff --git a/benchmarks/NodeRegression/configs/catboost.yaml b/benchmarks/NodeRegression/configs/catboost.yaml
@@ -0,0 +1,12 @@
+hp:
+  lr:
+  - 0.01
+  - 0.1
+  depth:
+  - 4
+  - 6
+  l2_leaf_reg:
+  - null
+num_epochs: 1000
+patience: 100
+verbose: false
diff --git a/benchmarks/NodeRegression/configs/lightgbm.yaml b/benchmarks/NodeRegression/configs/lightgbm.yaml
@@ -0,0 +1,14 @@
+hp:
+    lr:
+    - 0.01
+    - 0.1
+    num_leaves:
+    - 15
+    - 63
+    lambda_l2:
+    - 0.0
+    boosting:
+    - gbdt
+num_epochs: 1000
+patience: 100
+
diff --git a/benchmarks/NodeRegression/configs/model_default.yaml b/benchmarks/NodeRegression/configs/model_default.yaml
@@ -0,0 +1,3 @@
+num_layers: 2
+num_hidden: 8
+dropout: .6
diff --git a/benchmarks/NodeRegression/configs/train_default.yaml b/benchmarks/NodeRegression/configs/train_default.yaml
@@ -0,0 +1,11 @@
+loss_fcn: mse
+self_loop: True
+to_dense: False
+lr: .01
+weight_decay: 0.001
+max_epoch: 10000
+early_stopping: True
+seed: 0
+batch_size: 256
+to_undirected: False
+optimizer: "Adam"
diff --git a/benchmarks/NodeRegression/grid/grid_example.yaml b/benchmarks/NodeRegression/grid/grid_example.yaml
@@ -0,0 +1,4 @@
+num_hidden: [32 ,64]
+lr: [0.001, 0.005, 0.01, .1]
+dropout: [0.2, 0.4, 0.6, 0.8]
+weight_decay: [.0001, .001, .01, .1]
diff --git a/benchmarks/NodeRegression/models/gat.py b/benchmarks/NodeRegression/models/gat.py
@@ -0,0 +1,57 @@
+"""
+GAT model in GLI.
+
+References:
+https://github.com/dmlc/dgl/tree/master/examples/pytorch/gat
+"""
+
+from torch import nn
+from dgl.nn import GATConv
+
+
+class GAT(nn.Module):
+    """GAT network."""
+
+    def __init__(self,
+                 g,
+                 num_layers,
+                 in_dim,
+                 num_hidden,
+                 num_classes,
+                 heads,
+                 activation,
+                 feat_drop,
+                 attn_drop,
+                 negative_slope,
+                 residual):
+        """Initiate model."""
+        super().__init__()
+        self.g = g
+        self.num_layers = num_layers
+        self.gat_layers = nn.ModuleList()
+        self.activation = activation
+
+        # input projection (no residual)
+        self.gat_layers.append(GATConv(
+            in_dim, num_hidden, heads[0],
+            feat_drop, attn_drop, negative_slope, False, self.activation))
+        # hidden layers
+        for layer in range(1, num_layers - 2):
+            # due to multi-head, the in_dim = num_hidden * num_heads
+            self.gat_layers.append(GATConv(num_hidden * heads[layer-1],
+                                           num_hidden, heads[layer],
+                                           feat_drop, attn_drop,
+                                           negative_slope, residual,
+                                           self.activation))
+        # output projection
+        self.gat_layers.append(GATConv(
+            num_hidden * heads[-2], num_classes, heads[-1],
+            feat_drop, attn_drop, negative_slope, residual, None))
+
+    def forward(self, inputs):
+        """Forward."""
+        h = inputs
+        for layer in range(self.num_layers):
+            h = self.gat_layers[layer](self.g, h)
+            h = h.flatten(1) if layer != self.num_layers - 1 else h.mean(1)
+        return h