diff --git a/README.md b/README.md
index 4a32cf8e..03e8f498 100644
--- a/README.md
+++ b/README.md
@@ -104,8 +104,9 @@ Training and evaluation of Graph Neural Network (GNN) models can be done in a ve
from torch_geometric.nn import GCN
from obnb.model_trainer.gnn import SimpleGNNTrainer
-# Use 1-dimensional trivial node feature by default
-dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET", version=version)
+# Use onehot encoded log degress as node feature by default
+dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET",
+ auto_generate_feature="OneHotLogDeg", version=version)
# Train and evaluate a GCN
gcn_mdl = GCN(in_channels=1, hidden_channels=64, num_layers=5, out_channels=n_tasks)
@@ -131,11 +132,13 @@ lsc = data.DisGeNET(root, version=version)
```python
from obnb.util.converter import GenePropertyConverter
-from obnb.label.split import RatioHoldout
+from obnb.label.split import RatioPartition
-# Load PubMed count gene property converter and use it to set up study-bias holdout split
+# Load PubMed count gene property converter and use it to set up
+# 6/2/2 study-bias based train/val/test splits
pubmedcnt_converter = GenePropertyConverter(root, name="PubMedCount")
-splitter = RatioHoldout(0.6, 0.4, ascending=False, property_converter=pubmedcnt_converter)
+splitter = RatioPartition(0.6, 0.2, 0.2, ascending=False,
+ property_converter=pubmedcnt_converter)
```
#### Filter labeled data based on network genes and splits
diff --git a/example/label_propagation_studybias_holdout.py b/example/label_propagation_studybias_holdout.py
index 644ca76c..b6d7f187 100644
--- a/example/label_propagation_studybias_holdout.py
+++ b/example/label_propagation_studybias_holdout.py
@@ -1,6 +1,6 @@
from utils import load_data, print_expected
-from obnb import BaseDataset
+from obnb import Dataset
from obnb.label.split import RatioPartition
from obnb.metric import auroc
from obnb.model.label_propagation import OneHopPropagation
@@ -20,18 +20,19 @@
trainer = LabelPropagationTrainer(metrics, log_level="INFO")
# Evaluate the model for a single task
+# FIX: fix consider_negative
dataset = Dataset(
graph=g,
label=lsc,
splitter=splitter,
labelset_name=lsc.label_ids[0],
- consider_negative=True,
+ consider_negative=False,
)
print(trainer.train(mdl, dataset))
# Evaluate the model for all tasks
dataset = Dataset(graph=g, label=lsc, splitter=splitter)
-results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean")
+results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean")
print(f"Average train score = {results['train_auroc']:.4f}")
print(f"Average test score = {results['test_auroc']:.4f}")
diff --git a/example/logistic_regression_studybias_holdout.py b/example/logistic_regression_studybias_holdout.py
index cfa8de85..3a108ec6 100644
--- a/example/logistic_regression_studybias_holdout.py
+++ b/example/logistic_regression_studybias_holdout.py
@@ -21,18 +21,19 @@
trainer = SupervisedLearningTrainer(metrics, log_level="INFO")
# Train a single model
+# FIX: fix consider_negative
dataset = Dataset(
feature=feature,
label=lsc,
splitter=splitter,
labelset_name=lsc.label_ids[0],
- consider_negative=True,
+ consider_negative=False,
)
print(trainer.train(mdl, dataset))
# Evaluate the model for all tasks
dataset = Dataset(feature=feature, label=lsc, splitter=splitter)
-results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean")
+results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean")
print(f"Average train score = {results['train_auroc']:.4f}")
print(f"Average test score = {results['test_auroc']:.4f}")
diff --git a/src/obnb/label/collection.py b/src/obnb/label/collection.py
index 3c8208f8..d75ed1c1 100644
--- a/src/obnb/label/collection.py
+++ b/src/obnb/label/collection.py
@@ -1,3 +1,4 @@
+import warnings
from functools import lru_cache
import numpy as np
@@ -425,6 +426,16 @@ def split( # TODO: Reduce cyclic complexity..
masks[mask_name] = mask
if consider_negative:
+ warnings.warn(
+ "consider_negative option in LabelsetCollection.split is "
+ "deprecated and will be removed very soon. The usage of this "
+ "option is likely to cause subtle bugs.\nThe consider_negative"
+ "option is replaced by the implicit construction of negatives, "
+ "e.g., by NegativeGeneratorHypergeom. It will be used in the "
+ "form of y_mask from the return of LabelsetCollection.get_y",
+ DeprecationWarning,
+ stacklevel=2,
+ )
if labelset_name is None:
# TODO: option for consider negatives with multiple labelsets
raise ValueError(
diff --git a/src/obnb/model_trainer/base.py b/src/obnb/model_trainer/base.py
index bbe78af2..e80a1f65 100644
--- a/src/obnb/model_trainer/base.py
+++ b/src/obnb/model_trainer/base.py
@@ -139,8 +139,8 @@ def fit_and_eval(
x = None if dataset.feature is None else dataset.feature.mat
_, _, get_predictions, compute_results = self._setup(dataset, split_idx)
- pbar = tqdm(enumerate(dataset.label.label_ids), disable=not progress)
- for i, label_id in pbar:
+ pbar = tqdm(dataset.label.label_ids, disable=not progress)
+ for i, label_id in enumerate(pbar):
y, masks = dataset.label.split(
splitter=dataset.splitter,
target_ids=tuple(dataset.idmap.lst),
diff --git a/tutorials/basic_tutorial.ipynb b/tutorials/basic_tutorial.ipynb
index adf71abf..611ff4d6 100644
--- a/tutorials/basic_tutorial.ipynb
+++ b/tutorials/basic_tutorial.ipynb
@@ -24,7 +24,7 @@
"colab_type": "text"
},
"source": [
- ""
+ ""
]
},
{
@@ -90,18 +90,6 @@
"execution_count": null,
"outputs": []
},
- {
- "cell_type": "code",
- "source": [
- "import obnb.ext.pecanpy\n",
- "print(f\"Extension for PecanPy installed: {obnb.ext.pecanpy}\")"
- ],
- "metadata": {
- "id": "_ZYMxfgfUZFe"
- },
- "execution_count": null,
- "outputs": []
- },
{
"cell_type": "markdown",
"source": [
@@ -357,17 +345,6 @@
"execution_count": null,
"outputs": []
},
- {
- "cell_type": "code",
- "source": [
- "obnb.label.LabelsetCollection"
- ],
- "metadata": {
- "id": "TvKsu8rejken"
- },
- "execution_count": null,
- "outputs": []
- },
{
"cell_type": "markdown",
"source": [