Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor fixes to notebook and readme #452

Merged
merged 7 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ Training and evaluation of Graph Neural Network (GNN) models can be done in a ve
from torch_geometric.nn import GCN
from obnb.model_trainer.gnn import SimpleGNNTrainer

# Use 1-dimensional trivial node feature by default
dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET", version=version)
# Use onehot encoded log degress as node feature by default
dataset = OpenBiomedNetBench(root=root, graph_name="BioGRID", label_name="DisGeNET",
auto_generate_feature="OneHotLogDeg", version=version)

# Train and evaluate a GCN
gcn_mdl = GCN(in_channels=1, hidden_channels=64, num_layers=5, out_channels=n_tasks)
Expand All @@ -131,11 +132,13 @@ lsc = data.DisGeNET(root, version=version)

```python
from obnb.util.converter import GenePropertyConverter
from obnb.label.split import RatioHoldout
from obnb.label.split import RatioPartition

# Load PubMed count gene property converter and use it to set up study-bias holdout split
# Load PubMed count gene property converter and use it to set up
# 6/2/2 study-bias based train/val/test splits
pubmedcnt_converter = GenePropertyConverter(root, name="PubMedCount")
splitter = RatioHoldout(0.6, 0.4, ascending=False, property_converter=pubmedcnt_converter)
splitter = RatioPartition(0.6, 0.2, 0.2, ascending=False,
property_converter=pubmedcnt_converter)
```

#### Filter labeled data based on network genes and splits
Expand Down
7 changes: 4 additions & 3 deletions example/label_propagation_studybias_holdout.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from utils import load_data, print_expected

from obnb import BaseDataset
from obnb import Dataset
from obnb.label.split import RatioPartition
from obnb.metric import auroc
from obnb.model.label_propagation import OneHopPropagation
Expand All @@ -20,18 +20,19 @@
trainer = LabelPropagationTrainer(metrics, log_level="INFO")

# Evaluate the model for a single task
# FIX: fix consider_negative
dataset = Dataset(
graph=g,
label=lsc,
splitter=splitter,
labelset_name=lsc.label_ids[0],
consider_negative=True,
consider_negative=False,
)
print(trainer.train(mdl, dataset))

# Evaluate the model for all tasks
dataset = Dataset(graph=g, label=lsc, splitter=splitter)
results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean")
results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean")
print(f"Average train score = {results['train_auroc']:.4f}")
print(f"Average test score = {results['test_auroc']:.4f}")

Expand Down
5 changes: 3 additions & 2 deletions example/logistic_regression_studybias_holdout.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,19 @@
trainer = SupervisedLearningTrainer(metrics, log_level="INFO")

# Train a single model
# FIX: fix consider_negative
dataset = Dataset(
feature=feature,
label=lsc,
splitter=splitter,
labelset_name=lsc.label_ids[0],
consider_negative=True,
consider_negative=False,
)
print(trainer.train(mdl, dataset))

# Evaluate the model for all tasks
dataset = Dataset(feature=feature, label=lsc, splitter=splitter)
results = trainer.fit_and_eval(mdl, dataset, consider_negative=True, reduce="mean")
results = trainer.fit_and_eval(mdl, dataset, consider_negative=False, reduce="mean")
print(f"Average train score = {results['train_auroc']:.4f}")
print(f"Average test score = {results['test_auroc']:.4f}")

Expand Down
11 changes: 11 additions & 0 deletions src/obnb/label/collection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from functools import lru_cache

import numpy as np
Expand Down Expand Up @@ -425,6 +426,16 @@ def split( # TODO: Reduce cyclic complexity..
masks[mask_name] = mask

if consider_negative:
warnings.warn(
"consider_negative option in LabelsetCollection.split is "
"deprecated and will be removed very soon. The usage of this "
"option is likely to cause subtle bugs.\nThe consider_negative"
"option is replaced by the implicit construction of negatives, "
"e.g., by NegativeGeneratorHypergeom. It will be used in the "
"form of y_mask from the return of LabelsetCollection.get_y",
DeprecationWarning,
stacklevel=2,
)
if labelset_name is None:
# TODO: option for consider negatives with multiple labelsets
raise ValueError(
Expand Down
4 changes: 2 additions & 2 deletions src/obnb/model_trainer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def fit_and_eval(
x = None if dataset.feature is None else dataset.feature.mat

_, _, get_predictions, compute_results = self._setup(dataset, split_idx)
pbar = tqdm(enumerate(dataset.label.label_ids), disable=not progress)
for i, label_id in pbar:
pbar = tqdm(dataset.label.label_ids, disable=not progress)
for i, label_id in enumerate(pbar):
y, masks = dataset.label.split(
splitter=dataset.splitter,
target_ids=tuple(dataset.idmap.lst),
Expand Down
25 changes: 1 addition & 24 deletions tutorials/basic_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/krishnanlab/obnb/blob/tutorial/tutorials/basic_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
"<a href=\"https://colab.research.google.com/github/krishnanlab/obnb/blob/main/tutorials/basic_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
Expand Down Expand Up @@ -90,18 +90,6 @@
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import obnb.ext.pecanpy\n",
"print(f\"Extension for PecanPy installed: {obnb.ext.pecanpy}\")"
],
"metadata": {
"id": "_ZYMxfgfUZFe"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
Expand Down Expand Up @@ -357,17 +345,6 @@
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"obnb.label.LabelsetCollection"
],
"metadata": {
"id": "TvKsu8rejken"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
Expand Down