diff --git a/README.md b/README.md index 4a32cf8e..03e8f498 100644 --- a/README.md +++ b/README.md @@ -104,8 +104,9 @@ Training and evaluation of Graph Neural Network (GNN) models can be done in a ve from torch_geometric.nn import GCN from obnb.model_trainer.gnn import SimpleGNNTrainer -# Use 1-dimensional trivial node feature by default -dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET", version=version) +# Use onehot encoded log degress as node feature by default +dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET", + auto_generate_feature="OneHotLogDeg", version=version) # Train and evaluate a GCN gcn_mdl = GCN(in_channels=1, hidden_channels=64, num_layers=5, out_channels=n_tasks) @@ -131,11 +132,13 @@ lsc = data.DisGeNET(root, version=version) ```python from obnb.util.converter import GenePropertyConverter -from obnb.label.split import RatioHoldout +from obnb.label.split import RatioPartition -# Load PubMed count gene property converter and use it to set up study-bias holdout split +# Load PubMed count gene property converter and use it to set up +# 6/2/2 study-bias based train/val/test splits pubmedcnt_converter = GenePropertyConverter(root, name="PubMedCount") -splitter = RatioHoldout(0.6, 0.4, ascending=False, property_converter=pubmedcnt_converter) +splitter = RatioPartition(0.6, 0.2, 0.2, ascending=False, + property_converter=pubmedcnt_converter) ``` #### Filter labeled data based on network genes and splits diff --git a/example/label_propagation_studybias_holdout.py b/example/label_propagation_studybias_holdout.py index 644ca76c..b6d7f187 100644 --- a/example/label_propagation_studybias_holdout.py +++ b/example/label_propagation_studybias_holdout.py @@ -1,6 +1,6 @@ from utils import load_data, print_expected -from obnb import BaseDataset +from obnb import Dataset from obnb.label.split import RatioPartition from obnb.metric import auroc from obnb.model.label_propagation import OneHopPropagation @@ -20,18 +20,19 @@ trainer = LabelPropagationTrainer(metrics, log_level="INFO") # Evaluate the model for a single task +# FIX: fix consider_negative dataset = Dataset( graph=g, label=lsc, splitter=splitter, labelset_name=lsc.label_ids[0], - consider_negative=True, + consider_negative=False, ) print(trainer.train(mdl, dataset)) # Evaluate the model for all tasks dataset = Dataset(graph=g, label=lsc, splitter=splitter) -results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean") +results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean") print(f"Average train score = {results['train_auroc']:.4f}") print(f"Average test score = {results['test_auroc']:.4f}") diff --git a/example/logistic_regression_studybias_holdout.py b/example/logistic_regression_studybias_holdout.py index cfa8de85..3a108ec6 100644 --- a/example/logistic_regression_studybias_holdout.py +++ b/example/logistic_regression_studybias_holdout.py @@ -21,18 +21,19 @@ trainer = SupervisedLearningTrainer(metrics, log_level="INFO") # Train a single model +# FIX: fix consider_negative dataset = Dataset( feature=feature, label=lsc, splitter=splitter, labelset_name=lsc.label_ids[0], - consider_negative=True, + consider_negative=False, ) print(trainer.train(mdl, dataset)) # Evaluate the model for all tasks dataset = Dataset(feature=feature, label=lsc, splitter=splitter) -results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean") +results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean") print(f"Average train score = {results['train_auroc']:.4f}") print(f"Average test score = {results['test_auroc']:.4f}") diff --git a/src/obnb/label/collection.py b/src/obnb/label/collection.py index 3c8208f8..d75ed1c1 100644 --- a/src/obnb/label/collection.py +++ b/src/obnb/label/collection.py @@ -1,3 +1,4 @@ +import warnings from functools import lru_cache import numpy as np @@ -425,6 +426,16 @@ def split( # TODO: Reduce cyclic complexity.. masks[mask_name] = mask if consider_negative: + warnings.warn( + "consider_negative option in LabelsetCollection.split is " + "deprecated and will be removed very soon. The usage of this " + "option is likely to cause subtle bugs.\nThe consider_negative" + "option is replaced by the implicit construction of negatives, " + "e.g., by NegativeGeneratorHypergeom. It will be used in the " + "form of y_mask from the return of LabelsetCollection.get_y", + DeprecationWarning, + stacklevel=2, + ) if labelset_name is None: # TODO: option for consider negatives with multiple labelsets raise ValueError( diff --git a/src/obnb/model_trainer/base.py b/src/obnb/model_trainer/base.py index bbe78af2..e80a1f65 100644 --- a/src/obnb/model_trainer/base.py +++ b/src/obnb/model_trainer/base.py @@ -139,8 +139,8 @@ def fit_and_eval( x = None if dataset.feature is None else dataset.feature.mat _, _, get_predictions, compute_results = self._setup(dataset, split_idx) - pbar = tqdm(enumerate(dataset.label.label_ids), disable=not progress) - for i, label_id in pbar: + pbar = tqdm(dataset.label.label_ids, disable=not progress) + for i, label_id in enumerate(pbar): y, masks = dataset.label.split( splitter=dataset.splitter, target_ids=tuple(dataset.idmap.lst), diff --git a/tutorials/basic_tutorial.ipynb b/tutorials/basic_tutorial.ipynb index adf71abf..611ff4d6 100644 --- a/tutorials/basic_tutorial.ipynb +++ b/tutorials/basic_tutorial.ipynb @@ -24,7 +24,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -90,18 +90,6 @@ "execution_count": null, "outputs": [] }, - { - "cell_type": "code", - "source": [ - "import obnb.ext.pecanpy\n", - "print(f\"Extension for PecanPy installed: {obnb.ext.pecanpy}\")" - ], - "metadata": { - "id": "_ZYMxfgfUZFe" - }, - "execution_count": null, - "outputs": [] - }, { "cell_type": "markdown", "source": [ @@ -357,17 +345,6 @@ "execution_count": null, "outputs": [] }, - { - "cell_type": "code", - "source": [ - "obnb.label.LabelsetCollection" - ], - "metadata": { - "id": "TvKsu8rejken" - }, - "execution_count": null, - "outputs": [] - }, { "cell_type": "markdown", "source": [