From 1ffa0cb8fb55a86bbf2e2ed0b3e50a4ddbf85c60 Mon Sep 17 00:00:00 2001 From: Emma Dann Date: Tue, 28 Nov 2023 11:51:15 +0000 Subject: [PATCH] making class --- src/multi_condition_comparisions/tl/de.py | 101 ++++++++-------------- 1 file changed, 38 insertions(+), 63 deletions(-) diff --git a/src/multi_condition_comparisions/tl/de.py b/src/multi_condition_comparisions/tl/de.py index a1fca07..85f806d 100644 --- a/src/multi_condition_comparisions/tl/de.py +++ b/src/multi_condition_comparisions/tl/de.py @@ -9,6 +9,8 @@ from formulaic.model_matrix import ModelMatrix from tqdm.auto import tqdm +from pydeseq2 import DeseqDataSet, DeseqStats, DefaultInference + class BaseMethod(ABC): def __init__( @@ -163,68 +165,41 @@ def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame: return pd.DataFrame(res).sort_values("pvalue") -def _fit_pydeseq2( - adata: AnnData, - design: ArrayLike, - # contrast: ArrayLike, - kwargs** - ) -> pd.DataFrame: - ''' - Fit dds model using pydeseq2. Note: this creates its own adata object for downstream. - - Params: - ------- - adata: AnnData - Annotated data matrix. - design: Union[ArrayLike] - Design matrix with the same number of rows as adata.X. - contrast: ArrayLike - Binary vector specifying cont - kwargs: could be n_cpus, refit_cooks - Returns: - -------- - pd.DataFrame - Differential expression results - ''' - - inference = DefaultInference(n_cpus=3) - # not necessary - # counts_df = pd.DataFrame(adata.X, index= adata.obs_names, columns = adata.var_names) - # metadata = pd.DataFrame(design,index= adata.obs_names, columns = adata.var_names) - dds = DeseqDataSet(adata, design_factors="condition", refit_cooks=True, inference=inference) - dds.obsm['design_matrix'] = pd.DataFrame(design, - index = adata.obs_names.copy()) - #implement correct naming of the columns in design matrix for - # downstream - dds.fit_size_factors() - dds.fit_genewise_dispersions() - dds.fit_dispersion_trend() - dds.fit_dispersion_prior() - dds.fit_LFC() - - if refit_cooks: - dds.calculate_cooks() - dds.refit()# Replace outlier counts - self.dds = dds - - -def _test_contrast_pydeseq2(self, contrast: List[str], alpha = 0.05, - **kwargs) -> pd.DataFrame: - """ - Conduct a specific test and returns a data frame +class PyDESeq2DE(BaseMethod): + """Differential expression test using a PyDESeq2""" - Parameters - ---------- - contrasts: - list of three strings of the form - ["variable", "tested level", "reference level"] - alpha: p value threshold used for controlling fdr with - independent hypothesis weighting - kwargs: extra arguments to pass to DeseqStats() - """ + def fit(kwargs**) -> pd.DataFrame: + ''' + Fit dds model using pydeseq2. Note: this creates its own adata object for downstream. - stat_res = DeseqStats(dds, contrast = contrast, - alpha=alpha,**kwargs) - stat_res.summary() - stat_res.p_values - stat_res.results_df \ No newline at end of file + Params: + ---------- + **kwargs + Keyword arguments specific to DeseqDataSet() + ''' + + inference = DefaultInference(n_cpus=3) + dds = DeseqDataSet(self.adata, design_factors="condition", refit_cooks=True, inference=inference, **kwargs) + dds.obsm['design_matrix'] = pd.DataFrame(self.design, index = self.adata.obs_names.copy()) + #implement correct naming of the columns in design matrix for + # downstream + dds.deseq2() + self.dds = dds + + def _test_single_contrast(self, contrast: List[str], alpha = 0.05, **kwargs) -> pd.DataFrame: + """ + Conduct a specific test and returns a data frame + + Parameters + ---------- + contrasts: + list of three strings of the form + ["variable", "tested level", "reference level"] + alpha: p value threshold used for controlling fdr with + independent hypothesis weighting + kwargs: extra arguments to pass to DeseqStats() + """ + stat_res = DeseqStats(self.dds, contrast = contrast, alpha=alpha, **kwargs) + stat_res.summary() + stat_res.p_values + return pd.DataFrame(stat_res.results_df).sort_values("padj") \ No newline at end of file