From bd7d5d4035f5f32c77d38e2744a0f0bc1c58482d Mon Sep 17 00:00:00 2001 From: suzannejin Date: Thu, 24 Oct 2024 14:14:14 +0200 Subject: [PATCH 1/3] fix bug with number_of_cutoffs in propd.R --- modules/local/propr/propd/templates/propd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/propr/propd/templates/propd.R b/modules/local/propr/propd/templates/propd.R index 2467b398..6954d636 100644 --- a/modules/local/propr/propd/templates/propd.R +++ b/modules/local/propr/propd/templates/propd.R @@ -469,7 +469,7 @@ if (opt\$permutation == 0) { pd <- updateCutoffs( pd, - number_of_cutoffs = 100, + number_of_cutoffs = opt\$number_of_cutoffs, ncores = opt\$ncores ) From 8db59cc4be5419b70044ee6de3e5d53b147445d9 Mon Sep 17 00:00:00 2001 From: suzannejin Date: Fri, 25 Oct 2024 13:08:16 +0200 Subject: [PATCH 2/3] update toolsheet --- assets/tools_samplesheet.csv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/assets/tools_samplesheet.csv b/assets/tools_samplesheet.csv index efdf5457..1345355a 100644 --- a/assets/tools_samplesheet.csv +++ b/assets/tools_samplesheet.csv @@ -1,8 +1,10 @@ pathway_name,diff_method,args_diff,cor_method,args_cor,enr_method,args_enr propd,propd,,,,, propd_fdr,propd,--permutation 100,,,, +propd_grea,propd,,,,grea, +propd_ora,propd,,,,gprofiler2, +deseq2_gsea,deseq2,,,,gsea, +deseq2_ora,deseq2,,,,gprofiler2, pcorbshrink,,,propr,--metric pcor.bshrink,, propr,,,propr,--metric rho,, cor,,,propr,--metric cor,, -propd_grea,propd,,,,grea, -deseq2,deseq2,,,,gsea, From dab58e5a6f7bec585f1c332680543759908dc5bf Mon Sep 17 00:00:00 2001 From: suzannejin Date: Fri, 25 Oct 2024 13:10:53 +0200 Subject: [PATCH 3/3] updated propr container and modified propd.R to loop through FDR testing when no meaningful cutoff is found --- conf/test_experimental.config | 6 ++- modules/local/propr/grea/main.nf | 4 +- modules/local/propr/propd/main.nf | 4 +- modules/local/propr/propd/templates/propd.R | 53 ++++++++++++++++----- modules/local/propr/propr/main.nf | 4 +- subworkflows/local/differential/main.nf | 3 ++ 6 files changed, 55 insertions(+), 19 deletions(-) diff --git a/conf/test_experimental.config b/conf/test_experimental.config index 56ca6a2e..8c97d31c 100644 --- a/conf/test_experimental.config +++ b/conf/test_experimental.config @@ -29,7 +29,11 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv' matrix = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv' contrasts = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.contrasts.csv' - pathway = "propd,propd_fdr,pcorbshrink,propr,cor,propd_grea" + pathway = "propd,propd_fdr,propd_grea,propr" + + // some pathways are not included in the test, because they don't make sense to test for this dataset + // for example propd_fdr don't give significant results on this dataset, and the permutation will be scaped + // and pcor.bshrink is taking too long for this test (15min), and it should be tested on matrices with n > p, or n close to p. (eg. specific pathways, or on DE genes only, etc) //Features features_metadata_cols = 'gene_id,gene_name' diff --git a/modules/local/propr/grea/main.nf b/modules/local/propr/grea/main.nf index 4c37cc58..2b503133 100644 --- a/modules/local/propr/grea/main.nf +++ b/modules/local/propr/grea/main.nf @@ -4,8 +4,8 @@ process PROPR_GREA { // conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:75e6dfd62a2313a9': - 'community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:fbd569ab00953cb0' }" + 'oras://community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:209490acb0e524e3' : + 'community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:17abd3f137436739' }" input: tuple val(meta), path(adj) diff --git a/modules/local/propr/propd/main.nf b/modules/local/propr/propd/main.nf index 7f3c7f94..8bfd9e3a 100644 --- a/modules/local/propr/propd/main.nf +++ b/modules/local/propr/propd/main.nf @@ -4,8 +4,8 @@ process PROPR_PROPD { // conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:75e6dfd62a2313a9': - 'community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:fbd569ab00953cb0' }" + 'oras://community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:209490acb0e524e3' : + 'community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:17abd3f137436739' }" input: tuple val(meta), path(count), path(samplesheet), val(contrast_variable), val(reference), val(target) diff --git a/modules/local/propr/propd/templates/propd.R b/modules/local/propr/propd/templates/propd.R index 6954d636..c4aae21a 100644 --- a/modules/local/propr/propd/templates/propd.R +++ b/modules/local/propr/propd/templates/propd.R @@ -465,21 +465,50 @@ if (opt\$permutation == 0) { warning('Permutation tests are used to compute FDR values.') - # update FDR values using permutation tests + # calculate FDR values using permutation tests + # this part will call the updateCutoffs function iteratively + # as far as it does not find a meaningful theta value + # and does not reach the maximum number of iterations + + theta_cutoff <- FALSE + max_cutoff <- 1 + ntry <- 0 + while (!theta_cutoff & max_cutoff > 0 & ntry < 10) { + ntry <- ntry + 1 + + # get theta cutoffs to test the FDR + + if (ntry > 1) { + part <- pd@fdr[which(pd@fdr\$truecounts > 0),] + if (nrow(part) > 1) { + max_cutoff <- min(part\$cutoff) + } else { + break + } + } - pd <- updateCutoffs( - pd, - number_of_cutoffs = opt\$number_of_cutoffs, - ncores = opt\$ncores - ) + cutoffs <- as.numeric(quantile( + pd@results[pd@results\$theta < max_cutoff, 'theta'], + seq(0, 1, length.out = opt\$number_of_cutoffs) + )) - # get theta cutoff + # update FDR values + + pd <- updateCutoffs( + pd, + custom_cutoffs = cutoffs, + ncores = opt\$ncores + ) + + # check if any theta value has FDR below desired threshold + + theta_cutoff <- getCutoffFDR( + pd, + fdr=opt\$fdr, + window_size=1 + ) + } - theta_cutoff <- getCutoffFDR( - pd, - fdr=opt\$fdr, - window_size=1 - ) if (theta_cutoff) { warning('Significant theta value found: ', theta_cutoff) diff --git a/modules/local/propr/propr/main.nf b/modules/local/propr/propr/main.nf index efe25569..99e7770e 100644 --- a/modules/local/propr/propr/main.nf +++ b/modules/local/propr/propr/main.nf @@ -4,8 +4,8 @@ process PROPR_PROPR { // conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:75e6dfd62a2313a9': - 'community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:fbd569ab00953cb0' }" + 'oras://community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:209490acb0e524e3' : + 'community.wave.seqera.io/library/bioconductor-limma_r-ggplot2_r-propr:17abd3f137436739' }" input: tuple val(meta), path(count) diff --git a/subworkflows/local/differential/main.nf b/subworkflows/local/differential/main.nf index 2cc67427..bd45178d 100644 --- a/subworkflows/local/differential/main.nf +++ b/subworkflows/local/differential/main.nf @@ -39,6 +39,9 @@ workflow DIFFERENTIAL { // Perform differential analysis with propd // ---------------------------------------------------- + // TODO propd currently don't support blocking, so we should not run propd with same contrast_variable, reference and target, + // but different blocking variable, since it will simply run the same analysis again. + ch_counts .join(ch_samplesheet) .combine(ch_contrasts)