Skip to content
This repository has been archived by the owner on May 27, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into edger-class
Browse files Browse the repository at this point in the history
  • Loading branch information
hwhitfield authored Nov 28, 2023
2 parents fe17448 + b11593a commit 38fcf64
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pip install multi-condition-comparisions
1. Install the latest development version:

```bash
pip install git+https://github.com/grst/multi-condition-comparisions.git@main
pip install git+https://github.com/scverse/multi-condition-comparisions.git@main
```

## Release notes
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ urls.Home-page = "https://github.com/scverse/multi-condition-comparisons"
dependencies = [
"anndata",
"formulaic",
"pandas",
"pydeseq2",
"scanpy",
"rpy2",
Expand Down
49 changes: 41 additions & 8 deletions src/multi_condition_comparisions/tl/de.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
import scanpy as sc
import statsmodels.regression.linear_model
import statsmodels.api as sm
from anndata import AnnData
from formulaic import model_matrix
from formulaic.model_matrix import ModelMatrix
Expand All @@ -16,7 +16,12 @@

class BaseMethod(ABC):
def __init__(
self, adata: AnnData, design: str | np.ndarray, mask: str | None = None, layer: str | None = None, **kwargs
self,
adata: AnnData,
design: str | np.ndarray,
mask: str | None = None,
layer: str | None = None,
**kwargs,
):
"""
Initialize the method
Expand All @@ -28,7 +33,9 @@ def __init__(
design
Model design. Can be either a design matrix, a formulaic formula.
mask
a column in adata.var that contains a boolean mask with selected features.
A column in adata.var that contains a boolean mask with selected features.
layer
Layer to use in fit(). If None, use the X matrix.
**kwargs
Keyword arguments specific to the method implementation
"""
Expand Down Expand Up @@ -141,12 +148,36 @@ def contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndar
class StatsmodelsDE(BaseMethod):
"""Differential expression test using a statsmodels linear regression"""

def fit(self):
"""Fit the OLS model"""
def fit(
self,
regression_model: sm.OLS | sm.GLM = sm.OLS,
**kwargs,
) -> None:
"""
Fit the specified regression model.
Parameters
----------
regression_model
A statsmodels regression model class, either OLS or GLM. Defaults to OLS.
**kwargs
Additional arguments for fitting the specific method. In particular, this
is where you can specify the family for GLM.
Example
-------
>>> import statsmodels.api as sm
>>> model = StatsmodelsDE(adata, design="~condition")
>>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
>>> results = model.test_contrasts(np.array([0, 1]))
"""
self.models = []
for var in tqdm(self.adata.var_names):
mod = statsmodels.regression.linear_model.OLS(
sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var], self.design
mod = regression_model(
sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
self.design,
**kwargs,
)
mod = mod.fit()
self.models.append(mod)
Expand All @@ -164,7 +195,8 @@ def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
"fold_change": t_test.effect.item(),
}
)
return pd.DataFrame(res).sort_values("pvalue")

return pd.DataFrame(res).sort_values("pvalue").set_index("variable")

class EdgeRDE(BaseMethod):
"""Differential expression test using EdgeR"""
Expand Down Expand Up @@ -312,3 +344,4 @@ def _test_single_contrast(self, contrast: List[str]) -> pd.DataFrame:

return de_res


30 changes: 25 additions & 5 deletions tests/test_de.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import anndata as ad
import numpy as np
import pytest
import statsmodels.api as sm
from pandas import testing as tm
from pydeseq2.utils import load_example_data

import multi_condition_comparisions
Expand Down Expand Up @@ -28,11 +30,29 @@ def test_adata():
return ad.AnnData(X=counts, obs=metadata)


@pytest.mark.parametrize("method_class", [StatsmodelsDE])
def test_de(test_adata, method_class: BaseMethod):
"""Check that the method can be initialized and fitted and that the test_contrast
method returns a dataframe with the correct number of rows"""
@pytest.mark.parametrize(
"method_class,kwargs",
[
# OLS
(StatsmodelsDE, {}),
# Negative Binomial
(
StatsmodelsDE,
{"regression_model": sm.GLM, "family": sm.families.NegativeBinomial()},
),
],
)
def test_de(test_adata, method_class: BaseMethod, kwargs):
"""Check that the method can be initialized and fitted, and perform basic checks on
the result of test_contrasts."""
method = method_class(adata=test_adata, design="~condition")
method.fit()
method.fit(**kwargs)
res_df = method.test_contrasts(np.array([0, 1]))
# Check that the result has the correct number of rows
assert len(res_df) == test_adata.n_vars
# Check that the index of the result matches the var_names of the adata
tm.assert_index_equal(test_adata.var_names, res_df.index, check_order=False, check_names=False)
# Check that there is a p-value column
assert "pvalue" in res_df.columns
# Check that p-values are between 0 and 1
assert np.all((0 <= res_df["pvalue"]) & (res_df["pvalue"] <= 1))

0 comments on commit 38fcf64

Please sign in to comment.