krishnanlab · RemyLau · Jul 11, 2023 · Jul 11, 2023 · Jul 11, 2023 · Jul 11, 2023
diff --git a/README.md b/README.md
@@ -104,8 +104,9 @@ Training and evaluation of Graph Neural Network (GNN) models can be done in a ve
 from torch_geometric.nn import GCN
 from obnb.model_trainer.gnn import SimpleGNNTrainer
 
-# Use 1-dimensional trivial node feature by default
-dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET", version=version)
+# Use onehot encoded log degress as node feature by default
+dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET",
+                             auto_generate_feature="OneHotLogDeg", version=version)
 
 # Train and evaluate a GCN
 gcn_mdl = GCN(in_channels=1, hidden_channels=64, num_layers=5, out_channels=n_tasks)
@@ -131,11 +132,13 @@ lsc = data.DisGeNET(root, version=version)
 
 ```python
 from obnb.util.converter import GenePropertyConverter
-from obnb.label.split import RatioHoldout
+from obnb.label.split import RatioPartition
 
-# Load PubMed count gene property converter and use it to set up study-bias holdout split
+# Load PubMed count gene property converter and use it to set up
+# 6/2/2 study-bias based train/val/test splits
 pubmedcnt_converter = GenePropertyConverter(root, name="PubMedCount")
-splitter = RatioHoldout(0.6, 0.4, ascending=False, property_converter=pubmedcnt_converter)
+splitter = RatioPartition(0.6, 0.2, 0.2, ascending=False,
+                          property_converter=pubmedcnt_converter)
 ```
 
 #### Filter labeled data based on network genes and splits

diff --git a/example/label_propagation_studybias_holdout.py b/example/label_propagation_studybias_holdout.py
@@ -1,6 +1,6 @@
 from utils import load_data, print_expected
 
-from obnb import BaseDataset
+from obnb import Dataset
 from obnb.label.split import RatioPartition
 from obnb.metric import auroc
 from obnb.model.label_propagation import OneHopPropagation
@@ -20,18 +20,19 @@
 trainer = LabelPropagationTrainer(metrics, log_level="INFO")
 
 # Evaluate the model for a single task
+# FIX: fix consider_negative
 dataset = Dataset(
     graph=g,
     label=lsc,
     splitter=splitter,
     labelset_name=lsc.label_ids[0],
-    consider_negative=True,
+    consider_negative=False,
 )
 print(trainer.train(mdl, dataset))
 
 # Evaluate the model for all tasks
 dataset = Dataset(graph=g, label=lsc, splitter=splitter)
-results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean")
+results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean")
 print(f"Average train score = {results['train_auroc']:.4f}")
 print(f"Average test score = {results['test_auroc']:.4f}")
 

diff --git a/example/logistic_regression_studybias_holdout.py b/example/logistic_regression_studybias_holdout.py
@@ -21,18 +21,19 @@
 trainer = SupervisedLearningTrainer(metrics, log_level="INFO")
 
 # Train a single model
+# FIX: fix consider_negative
 dataset = Dataset(
     feature=feature,
     label=lsc,
     splitter=splitter,
     labelset_name=lsc.label_ids[0],
-    consider_negative=True,
+    consider_negative=False,
 )
 print(trainer.train(mdl, dataset))
 
 # Evaluate the model for all tasks
 dataset = Dataset(feature=feature, label=lsc, splitter=splitter)
-results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean")
+results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean")
 print(f"Average train score = {results['train_auroc']:.4f}")
 print(f"Average test score = {results['test_auroc']:.4f}")
 

diff --git a/src/obnb/label/collection.py b/src/obnb/label/collection.py
@@ -1,3 +1,4 @@
+import warnings
 from functools import lru_cache
 
 import numpy as np
@@ -425,6 +426,16 @@ def split(  # TODO: Reduce cyclic complexity..
             masks[mask_name] = mask
 
         if consider_negative:
+            warnings.warn(
+                "consider_negative option in LabelsetCollection.split is "
+                "deprecated and will be removed very soon. The usage of this "
+                "option is likely to cause subtle bugs.\nThe consider_negative"
+                "option is replaced by the implicit construction of negatives, "
+                "e.g., by NegativeGeneratorHypergeom. It will be used in the "
+                "form of y_mask from the return of LabelsetCollection.get_y",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             if labelset_name is None:
                 # TODO: option for consider negatives with multiple labelsets
                 raise ValueError(

diff --git a/src/obnb/model_trainer/base.py b/src/obnb/model_trainer/base.py
@@ -139,8 +139,8 @@ def fit_and_eval(
         x = None if dataset.feature is None else dataset.feature.mat
 
         _, _, get_predictions, compute_results = self._setup(dataset, split_idx)
-        pbar = tqdm(enumerate(dataset.label.label_ids), disable=not progress)
-        for i, label_id in pbar:
+        pbar = tqdm(dataset.label.label_ids, disable=not progress)
+        for i, label_id in enumerate(pbar):
             y, masks = dataset.label.split(
                 splitter=dataset.splitter,
                 target_ids=tuple(dataset.idmap.lst),

diff --git a/tutorials/basic_tutorial.ipynb b/tutorials/basic_tutorial.ipynb
@@ -24,7 +24,7 @@
         "colab_type": "text"
       },
       "source": [
-        "<a href=\"https://colab.research.google.com/github/krishnanlab/obnb/blob/tutorial/tutorials/basic_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a href=\"https://colab.research.google.com/github/krishnanlab/obnb/blob/main/tutorials/basic_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
       ]
     },
     {
@@ -90,18 +90,6 @@
       "execution_count": null,
       "outputs": []
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "import obnb.ext.pecanpy\n",
-        "print(f\"Extension for PecanPy installed: {obnb.ext.pecanpy}\")"
-      ],
-      "metadata": {
-        "id": "_ZYMxfgfUZFe"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
     {
       "cell_type": "markdown",
       "source": [
@@ -357,17 +345,6 @@
       "execution_count": null,
       "outputs": []
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "obnb.label.LabelsetCollection"
-      ],
-      "metadata": {
-        "id": "TvKsu8rejken"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
     {
       "cell_type": "markdown",
       "source": [