Merge pull request #257 from WackerO/prerelease_fixes

Some fixes before the upcoming release
nf-core · Apr 30, 2024 · fe03ce6 · fe03ce6
2 parents c2d7ed0 + d9fd29a
commit fe03ce6
Show file tree

Hide file tree

Showing 11 changed files with 43 additions and 25 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Added`
 
 - [[#259](https://github.com/nf-core/differentialabundance/pull/259)] - Bump gtf2featureannotation to fix GTF handling error ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO))
+- [[#257](https://github.com/nf-core/differentialabundance/pull/257)] - Added maxquant profile to nextflow.config to make it available ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))
 - [[#254](https://github.com/nf-core/differentialabundance/pull/254)] - Some parameter changes, added qbic credits ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))
 - [[#250](https://github.com/nf-core/differentialabundance/pull/250)] - Template update for nf-core/tools v2.13.1 ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))
 - [[#244](https://github.com/nf-core/differentialabundance/pull/244)] - Add pipeline params for matrixfilter NA options ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))
@@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Fixed`
 
+- [[#257](https://github.com/nf-core/differentialabundance/pull/257)] - Fixed FILTER_DIFFTABLE module, updated PROTEUS module to better handle whitespace in prefix param, made docs clearer ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))
 - [[#254](https://github.com/nf-core/differentialabundance/pull/254)] - Made differential_file_suffix optional ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))
 - [[#240](https://github.com/nf-core/differentialabundance/pull/240)] - Publish GSEA reports ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO))
 - [[#231](https://github.com/nf-core/differentialabundance/pull/231)] - Update GSEA module to fix butterfly plot bug ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords))

diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
@@ -267,8 +267,9 @@ for (r in seq_along(contributors)) {
 
 ```{r, echo=FALSE}
 observations <- read_metadata(file.path(params$input_dir, params$observations), id_col = params$observations_id_col)
-if (! params$observations_name_col %in% colnames(observations)){
-    stop(paste('Invalid observation name column specified: ', params$observations_name_col, paste0('(Valid values are: ', paste(colnames(observations), collapse=', '),')')))
+observations_name_col <- ifelse(!is.null(params$observations_name_col), params$observations_name_col, params$observations_id_col)
+if (! observations_name_col %in% colnames(observations)){
+    stop(paste('Invalid observation name column specified: ', observations_name_col, paste0('(Valid values are: ', paste(colnames(observations), collapse=', '),')')))
 }
 
 if (! is.null(params$features)){
@@ -305,7 +306,7 @@ assay_data <- lapply(assay_files, function(x) {
         row.names = 1
         )
     )
-    colnames(mat) <- observations[[params$observations_name_col]][match(colnames(mat), rownames(observations))]
+    colnames(mat) <- observations[[observations_name_col]][match(colnames(mat), rownames(observations))]
     mat
 })
 
@@ -316,7 +317,7 @@ if (!is.null(params$features_log2_assays)) {
 assay_data <- cond_log2_transform_assays(assay_data, params$features_log2_assays)
 
 # Now we can rename the observations rows using the title field
-rownames(observations) <- observations[[params$observations_name_col]]
+rownames(observations) <- observations[[observations_name_col]]
 
 # Run PCA early so we can understand how important each variable is
 pca_datas <- lapply(names(assay_data), function(assay_type){
@@ -547,7 +548,7 @@ Whiskers in the above boxplots show `r params$exploratory_whisker_distance` time
 plotly_densityplot(
     assay_data,
     experiment = observations,
-    colorby = params$observations_name_col,
+    colorby = observations_name_col,
     expressiontype = paste("count per", params$features_type),
     makeColorScale(length(unique(observations[[params$observations_id_col]])), palette = "Set1")
 )

diff --git a/conf/maxquant.config b/conf/maxquant.config
@@ -38,7 +38,7 @@ params {
     differential_feature_name_column = "Majority protein IDs"
 
     // Proteus options
-    proteus_measurecol_prefix = 'LFQ intensity '
+    proteus_measurecol_prefix = 'LFQ intensity'
 
     // Shiny does not work for this datatype
     shinyngs_build_app               = false

diff --git a/docs/usage.md b/docs/usage.md
@@ -23,7 +23,11 @@ With the above in mind, running this workflow requires:
 --input '[path to samplesheet file]'
 ```
 
-This may well be the same sample sheet used to generate the input matrix. For example, in RNA-seq this might be the same sample sheet, perhaps derived from [fetchngs](https://github.com/nf-core/fetchngs), that was input to the [RNA-seq workflow](https://github.com/nf-core/rnaseq). It may be necessary to add columns that describe the groups you want to compare.
+This may well be the same sample sheet used to generate the input matrix. For example, in RNA-seq this might be the same sample sheet, perhaps derived from [fetchngs](https://github.com/nf-core/fetchngs), that was input to the [RNA-seq workflow](https://github.com/nf-core/rnaseq). It may be necessary to add columns that describe the groups you want to compare. The columns that the pipeline requires are:
+
+- a column listing the sample IDs (must be the same IDs as in the abundance matrix), in the example below it is called 'sample'. For some study_types, this column might need to be filled in with file names, e.g. when doing an affymetrix analysis.
+- one or more columns describing conditions for the differential analysis. In the example below it is called 'condition'
+- optionally one or more columns describing sample batches or similar which you want to be considered in the analysis. In the example below it is called 'batch'
 
 For example:
 
@@ -96,7 +100,7 @@ So we **do not recommend** raw counts files such as `salmon.merged.gene_counts.t
 --matrix '[path to matrix file]'
 ```
 
-This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (do not forget trailing whitespace in this parameter, if required!).
+This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (one whitespace is automatically added if necessary).
 
 ### Affymetrix microarrays
 

diff --git a/modules.json b/modules.json
@@ -62,7 +62,7 @@
                     },
                     "proteus/readproteingroups": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "a069b29783583c219c1f23ed3dcf64a5aee1340b",
                         "installed_by": ["modules"]
                     },
                     "rmarkdownnotebook": {

diff --git a/modules/nf-core/proteus/readproteingroups/environment.yml b/modules/nf-core/proteus/readproteingroups/environment.yml
diff --git a/modules/nf-core/proteus/readproteingroups/main.nf b/modules/nf-core/proteus/readproteingroups/main.nf
diff --git a/modules/nf-core/proteus/readproteingroups/meta.yml b/modules/nf-core/proteus/readproteingroups/meta.yml
diff --git a/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R
diff --git a/nextflow.config b/nextflow.config
@@ -63,7 +63,7 @@ params {
     affy_build_annotation    = true
 
     // Proteus-specific options
-    proteus_measurecol_prefix = 'LFQ intensity '
+    proteus_measurecol_prefix = 'LFQ intensity'
     proteus_norm_function     = 'normalizeMedian'
     proteus_plotsd_method     = 'violin'
     proteus_plotmv_loess      =  true
@@ -342,6 +342,7 @@ profiles {
     test_nogtf { includeConfig 'conf/test_nogtf.config' }
     test_full { includeConfig 'conf/test_full.config' }
     affy { includeConfig 'conf/affy.config' }
+    maxquant { includeConfig 'conf/maxquant.config' }
     rnaseq { includeConfig 'conf/rnaseq.config' }
     soft {includeConfig 'conf/soft.config'}
     test_affy { includeConfig 'conf/test_affy.config' }

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -268,8 +268,9 @@
             "properties": {
                 "proteus_measurecol_prefix": {
                     "type": "string",
-                    "default": "LFQ intensity ",
-                    "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity '; take care to also consider trailing whitespace between prefix and samplenames."
+                    "default": "LFQ intensity",
+                    "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity'; will search for both the prefix as entered and the prefix followed by one whitespace.",
+                    "help_text": "If the sample columns are e.g. called 'LFQ intensity sample1', 'LFQ intensity sample2' etc., please set this parameter to 'LFQ intensity'."
                 },
                 "proteus_norm_function": {
                     "type": "string",
@@ -1010,6 +1011,7 @@
             "properties": {
                 "report_file": {
                     "type": "string",
+                    "default": "${projectDir}/assets/differentialabundance_report.Rmd",
                     "description": "Rmd report template from which to create the pipeline report",
                     "help_text": "The pipeline will always generate a default report which gives a good overview of the analysis results. Should this default report not suit your needs, you can provide the path to a custom report instead.",
                     "format": "file-path",
@@ -1025,19 +1027,19 @@
                 },
                 "logo_file": {
                     "type": "string",
-                    "default": "docs/images/nf-core-differentialabundance_logo_light.png",
+                    "default": "${projectDir}/docs/images/nf-core-differentialabundance_logo_light.png",
                     "description": "A logo to display in the report instead of the generic pipeline logo",
                     "fa_icon": "far fa-font-awesome-logo-full"
                 },
                 "css_file": {
                     "type": "string",
-                    "default": "assets/nf-core_style.css",
+                    "default": "${projectDir}/assets/nf-core_style.css",
                     "description": "CSS to use to style the output, in lieu of the default nf-core styling",
                     "fa_icon": "far fa-file-code"
                 },
                 "citations_file": {
                     "type": "string",
-                    "default": "CITATIONS.md",
+                    "default": "${projectDir}/CITATIONS.md",
                     "description": "A markdown file containing citations to include in the fiinal report",
                     "fa_icon": "fas fa-ad"
                 },