Skip to content
This repository has been archived by the owner on May 27, 2024. It is now read-only.

Commit

Permalink
making class
Browse files Browse the repository at this point in the history
  • Loading branch information
emdann committed Nov 28, 2023
1 parent e36405a commit 1ffa0cb
Showing 1 changed file with 38 additions and 63 deletions.
101 changes: 38 additions & 63 deletions src/multi_condition_comparisions/tl/de.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from formulaic.model_matrix import ModelMatrix
from tqdm.auto import tqdm

from pydeseq2 import DeseqDataSet, DeseqStats, DefaultInference


class BaseMethod(ABC):
def __init__(
Expand Down Expand Up @@ -163,68 +165,41 @@ def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
return pd.DataFrame(res).sort_values("pvalue")


def _fit_pydeseq2(
adata: AnnData,
design: ArrayLike,
# contrast: ArrayLike,
kwargs**
) -> pd.DataFrame:
'''
Fit dds model using pydeseq2. Note: this creates its own adata object for downstream.
Params:
-------
adata: AnnData
Annotated data matrix.
design: Union[ArrayLike]
Design matrix with the same number of rows as adata.X.
contrast: ArrayLike
Binary vector specifying cont
kwargs: could be n_cpus, refit_cooks
Returns:
--------
pd.DataFrame
Differential expression results
'''

inference = DefaultInference(n_cpus=3)
# not necessary
# counts_df = pd.DataFrame(adata.X, index= adata.obs_names, columns = adata.var_names)
# metadata = pd.DataFrame(design,index= adata.obs_names, columns = adata.var_names)
dds = DeseqDataSet(adata, design_factors="condition", refit_cooks=True, inference=inference)
dds.obsm['design_matrix'] = pd.DataFrame(design,
index = adata.obs_names.copy())
#implement correct naming of the columns in design matrix for
# downstream
dds.fit_size_factors()
dds.fit_genewise_dispersions()
dds.fit_dispersion_trend()
dds.fit_dispersion_prior()
dds.fit_LFC()

if refit_cooks:
dds.calculate_cooks()
dds.refit()# Replace outlier counts
self.dds = dds


def _test_contrast_pydeseq2(self, contrast: List[str], alpha = 0.05,
**kwargs) -> pd.DataFrame:
"""
Conduct a specific test and returns a data frame
class PyDESeq2DE(BaseMethod):
"""Differential expression test using a PyDESeq2"""

Parameters
----------
contrasts:
list of three strings of the form
["variable", "tested level", "reference level"]
alpha: p value threshold used for controlling fdr with
independent hypothesis weighting
kwargs: extra arguments to pass to DeseqStats()
"""
def fit(kwargs**) -> pd.DataFrame:
'''
Fit dds model using pydeseq2. Note: this creates its own adata object for downstream.
stat_res = DeseqStats(dds, contrast = contrast,
alpha=alpha,**kwargs)
stat_res.summary()
stat_res.p_values
stat_res.results_df
Params:
----------
**kwargs
Keyword arguments specific to DeseqDataSet()
'''

inference = DefaultInference(n_cpus=3)
dds = DeseqDataSet(self.adata, design_factors="condition", refit_cooks=True, inference=inference, **kwargs)
dds.obsm['design_matrix'] = pd.DataFrame(self.design, index = self.adata.obs_names.copy())
#implement correct naming of the columns in design matrix for
# downstream
dds.deseq2()
self.dds = dds

def _test_single_contrast(self, contrast: List[str], alpha = 0.05, **kwargs) -> pd.DataFrame:
"""
Conduct a specific test and returns a data frame
Parameters
----------
contrasts:
list of three strings of the form
["variable", "tested level", "reference level"]
alpha: p value threshold used for controlling fdr with
independent hypothesis weighting
kwargs: extra arguments to pass to DeseqStats()
"""
stat_res = DeseqStats(self.dds, contrast = contrast, alpha=alpha, **kwargs)
stat_res.summary()
stat_res.p_values
return pd.DataFrame(stat_res.results_df).sort_values("padj")

0 comments on commit 1ffa0cb

Please sign in to comment.