Merge pull request #285 from bjlang/dev-ratio

Dev ratio
nf-core · Sep 10, 2024 · 9421df1 · 9421df1
2 parents 6e6a740 + 318706a
commit 9421df1
Show file tree

Hide file tree

Showing 9 changed files with 120 additions and 101 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,6 +32,7 @@ jobs:
           - "test_affy"
           - "test_maxquant"
           - "test_soft"
+          - "test_experimental"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4

diff --git a/assets/tools_samplesheet.csv b/assets/tools_samplesheet.csv
@@ -1,7 +1,6 @@
 pathway_name,diff_method,args_diff,enr_diff_method,args_enr_diff,cor_method,args_cor,enr_cor_method,args_enr_cor,sel_method,args_sel
-diff_prop,propd,--group_col fase --adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 100 --fixseed true, , ,,, , ,,
-diff_prop_noperm,propd,--group_col fase --adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 0 --fixseed true, , ,,, , ,,
-filtered_pcor,propd,--group_col fase --adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 100 --fixseed true, , ,propr,  --permutation 10 --adjacency true --cutoff_min 0.005 --cutoff_max 0.5 --cutoff_interval 0.01 --metric pcor.bshrink, , , filtervar,
-prop,, , , ,propr,  --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --fixseed true --metric rho --permutation 100 --adjacency true, , , ,
-diff_grea,propd,--group_col fase --adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 10 --fixseed true,grea, --permutation 10, , , , ,,
-deseq2,deseq2,,gsea,,,,,,,
+diff_prop,propd,--adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 100 --fixseed true,,,,,,,,
+diff_prop_noperm,propd,--cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 0 --fixseed true,,,,,,,,
+filtered_pcor,propd,--adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 100 --fixseed true,,,propr,--permutation 10 --adjacency true --cutoff_min 0.005 --cutoff_max 0.5 --cutoff_interval 0.01 --metric pcor.bshrink,,,filtervar,
+prop,,,,,propr,--cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --fixseed true --metric rho --permutation 100 --adjacency true,,,,
+diff_grea,propd,--adjacency true --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --permutation 10 --fixseed true,grea,--permutation 10,,,,,,
diff --git a/conf/modules_coda.config b/conf/modules_coda.config
diff --git a/conf/test_experimental.config b/conf/test_experimental.config
@@ -0,0 +1,45 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests without a GTF
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple
+    pipeline test of the CoDA experimental mode.
+
+    Use as follows:
+        nextflow run nf-core/differentialabundance -profile test_nogtf,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    study_name = 'SRP254919'
+    study_type = 'experimental'
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv'
+    matrix =  'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv'
+    contrasts =  'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.contrasts.csv'
+    tools = "$projectDir/assets/tools_samplesheet.csv"
+    pathway = "diff_prop,diff_prop_noperm,filtered_pcor,prop"
+
+    //Features
+    features_metadata_cols      = 'gene_id,gene_name'
+
+    // Observations
+    observations_id_col        = 'sample'
+    observations_name_col      = 'sample'
+
+    // Apply a higher filter to check that the filtering works
+    filtering_min_abundance=10
+
+    // Exploratory
+    exploratory_main_variable      = 'contrasts'
+}
diff --git a/nextflow.config b/nextflow.config
@@ -23,7 +23,7 @@ params {
 
     // Experimental analysis options
     tools   = null
-    pathway = null
+    pathway = 'diff_prop'
 
     // Reporting
     logo_file               = "$projectDir/docs/images/nf-core-differentialabundance_logo_light.png"
@@ -356,6 +356,7 @@ profiles {
     test_affy { includeConfig 'conf/test_affy.config' }
     test_maxquant { includeConfig 'conf/test_maxquant.config' }
     test_soft {includeConfig 'conf/test_soft.config' }
+    test_experimental {includeConfig 'conf/test_experimental.config' }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -79,6 +79,7 @@
                 },
                 "pathway": {
                     "type": "string",
+                    "default": "diff_prop",
                     "fa_icon": "fas fa-border-all",
                     "description": "(experimantal only): Choose a (list of) pathway from those predefined in the tool sheet",
                     "help_text": "Choose the a subset of pathways to run. Pathways are defined in the tool sheet."

diff --git a/subworkflows/local/differential/main.nf b/subworkflows/local/differential/main.nf
@@ -1,33 +1,59 @@
 //
 // Perform differential analysis
 //
-include {PROPR_PROPD as PROPD} from "../../../modules/nf-core/propr/propd/main.nf"
+include { PROPR_PROPD as PROPD } from "../../../modules/nf-core/propr/propd/main.nf"
+include { DESEQ2_DIFFERENTIAL  } from '../../../modules/nf-core/deseq2/differential/main'
+
 
 workflow DIFFERENTIAL {
     take:
+    ch_contrasts    // [meta, contrast_variable, reference, target]
+    ch_samplesheet
     ch_counts
     ch_tools
-    ch_samplesheet
 
     main:
     ch_counts
+        .join(ch_samplesheet)
+        .first()
         .combine(ch_tools)
+        .combine(ch_contrasts)
         .map {
-            metacounts, counts, meta ->
-                [ metacounts+meta, counts ]
+            meta_counts, counts, samplesheet, tools, meta_contrast, contrast_variable, reference, target ->
+                def meta = meta_counts.clone() + tools.clone()
+                meta.args_diff = (meta.args_diff ?: "") + " --group_col $contrast_variable"
+                [ meta, samplesheet, counts ]
         }
+        .unique()
         .branch {
             propd:  it[0]["diff_method"] == "propd"
+                return [it[0], it[2]]
             deseq2: it[0]["diff_method"] == "deseq2"
         }
         .set { ch_counts_tools }
 
-    PROPD(ch_counts_tools.propd, ch_samplesheet)
-    ch_results = PROPD.out.results
+    PROPD(
+        ch_counts_tools.propd,
+        ch_samplesheet.first()
+    )
+    ch_results   = PROPD.out.results
     ch_adjacency = PROPD.out.adj
 
+    // ToDo: In order to use deseq2 the downstream processes need to be updated to process the output correctly
+    // if (params.transcript_length_matrix) { ch_transcript_lengths = Channel.of([ exp_meta, file(params.transcript_length_matrix, checkIfExists: true)]).first() } else { ch_transcript_lengths = [[],[]] }
+    // if (params.control_features) { ch_control_features = Channel.of([ exp_meta, file(params.control_features, checkIfExists: true)]).first() } else { ch_control_features = [[],[]] }
+
+    // DESEQ2_DIFFERENTIAL (
+    //     ch_contrasts,
+    //     ch_counts_tools.deseq2,
+    //     ch_control_features,
+    //     ch_transcript_lengths
+    // )
+    // ch_results = ch_results
+    //     .mix(DESEQ2_DIFFERENTIAL.out.results)
+
     emit:
-    results = ch_results
+    results   = ch_results
     adjacency = ch_adjacency
 
 }
diff --git a/subworkflows/local/experimental/main.nf b/subworkflows/local/experimental/main.nf
@@ -9,58 +9,50 @@ include { ENRICHMENT }          from '../enrichment/main.nf'
 
 workflow EXPERIMENTAL {
     take:
-    ch_samples_and_matrix // [meta, samplesheet, matrix] que viene de differentialabundance
+    ch_contrasts
+    ch_samplesheet
+    ch_counts
     ch_tools
 
 
     main:
-    // Split the ch_samples_and_matrix into one channel for the samplesheet and another for the matrix (PROPD takes them separately).
-    ch_samples_and_matrix
-    .map {
-        meta, samplesheet, counts ->
-            [ meta, samplesheet ]
-    }
-    .set { ch_samplesheet }
-
-    ch_samples_and_matrix
-        .map {
-            meta, samplesheet, counts ->
-                [ meta, counts ]
-        }
-        .set { ch_counts }
-
-    ch_counts
-        .combine(ch_tools)
-        .map {
-            metacounts, counts, metatools ->
-                [ metacounts+metatools, counts ]
-        }
-        .set { ch_counts_tools }
-
-    // Perform CODA analysis
-    ch_out = Channel.empty()
-
     // Perform differential analysis
-    DIFFERENTIAL(ch_counts, ch_tools, ch_samplesheet.collect())
+    DIFFERENTIAL(
+        ch_contrasts,
+        ch_samplesheet,
+        ch_counts,
+        ch_tools
+    )
     ch_diff_results = DIFFERENTIAL.out.results
     ch_diff_adjacency = DIFFERENTIAL.out.adjacency
 
     // Perform variable selection
     ch_counts_filtered = VARIABLE_SELECTION(ch_diff_adjacency, ch_counts)
 
     // Perform correlation analysis
-    CORRELATION(ch_counts, ch_tools, ch_counts_filtered)
+    CORRELATION(
+        ch_counts,
+        ch_tools,
+        ch_counts_filtered
+    )
     ch_matrix = CORRELATION.out.matrix
     ch_cor_adjacency = CORRELATION.out.adjacency
-    ch_out.mix(ch_matrix)
 
     // Perform enrichment analysis
-    ENRICHMENT(ch_diff_adjacency, ch_cor_adjacency, ch_counts)
+    ENRICHMENT(
+        ch_diff_adjacency,
+        ch_cor_adjacency,
+        ch_counts
+    )
     ch_enriched_cor = ENRICHMENT.out.enriched_cor
     ch_enriched_diff = ENRICHMENT.out.enriched_diff
 
-    ch_out.mix(ch_enriched_diff, ch_enriched_cor)
-
     emit:
-    output = ch_out
+    diff_res    = ch_diff_results
+    diff_adj    = ch_diff_adjacency
+    var_count   = ch_counts_filtered
+    corr_matrix = ch_matrix
+    corr_adj    = ch_cor_adjacency
+    enriched_cor    = ch_enriched_cor
+    enriched_cor    = ch_enriched_diff
 }
diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf
@@ -360,27 +360,26 @@ workflow DIFFERENTIALABUNDANCE {
             .first()
     } else if (params.study_type == 'experimental') {
 
-        ch_samples_and_matrix = ch_input.combine(ch_in_raw.map{it[1]})
-        ch_samples_and_matrix.view()
-
         // Convert the samplesheet.csv in a channel with the proper format
         ch_tools = Channel.fromSamplesheet('tools')
 
-        // TO DO: This should be modified to run one path per default, not all
         if (params.pathway == "all") {
             ch_tools
                 .set{ ch_tools_single }
         } else {
             ch_tools
                 .filter{
-                    it[0]["pathway_name"] == params.pathway // TO DO: change pathway to path also in the tools_samplesheet file
+                    it[0]["pathway_name"] in params.pathway.tokenize(',')
                 }
                 .set{ ch_tools_single }
         }
-        ch_tools_single.view()
 
-        EXPERIMENTAL(ch_samples_and_matrix, ch_tools_single)
-        EXPERIMENTAL.out.output.view()
+        EXPERIMENTAL(
+            ch_contrasts,
+            VALIDATOR.out.sample_meta,
+            CUSTOM_MATRIXFILTER.out.filtered,
+            ch_tools_single
+        )
 
         ch_norm = Channel.empty()
         ch_differential = Channel.empty()