diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml
index f24cc9ff..42badad9 100644
--- a/.github/workflows/download_pipeline.yml
+++ b/.github/workflows/download_pipeline.yml
@@ -69,11 +69,3 @@ jobs:
- name: Inspect download
run: tree ./${{ env.REPOTITLE_LOWERCASE }}
-
- - name: Run the downloaded pipeline (stub)
- id: stub_run_pipeline
- continue-on-error: true
- env:
- NXF_SINGULARITY_CACHEDIR: ./
- NXF_SINGULARITY_HOME_MOUNT: true
- run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6854689c..dddb1aa8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,76 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## 2.2.0 - Dogmatix [2024-09-13]
+
+### `Added`
+
+- A new parameter `mt_aligner` to control which aligner is used to align reads to mitochondria [#600](https://github.com/nf-core/raredisease/pull/600)
+- A new parameter `par_bed` to pass a PAR bed files to deepvariant [#598](https://github.com/nf-core/raredisease/pull/598)
+- A new functionality to pass gzipped resources to vcfanno_extra_resources [#589](https://github.com/nf-core/raredisease/pull/589)
+- A new parameter `vcfanno_extra_resources` to pass an extra resource to vcfanno [#588](https://github.com/nf-core/raredisease/pull/588)
+- A new parameter `scatter_count` to control how many interval files are created from a genome (used to parallelize annotations) [#585](https://github.com/nf-core/raredisease/pull/585)
+- Print warning messages if user intends to perform ranking when there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579)
+- Two new parameters `skip_repeat_annotation` and `skip_repeat_calling` to skip calling and annotation of repeat expansions [#574](https://github.com/nf-core/raredisease/pull/574)
+- A new parameter `skip_smncopynumbercaller` to skip smncopynumbercaller module [#574](https://github.com/nf-core/raredisease/pull/574)
+- A new parameter `skip_sv_calling` to skip sv calling workflow [#572](https://github.com/nf-core/raredisease/pull/572)
+- Two new parameters `skip_snv_calling` and `skip_repeat_analysis` to skip snv calling and repeat analysis respectively [#571](https://github.com/nf-core/raredisease/pull/571)
+- Two new parameters `mbuffer_mem` and `samtools_sort_threads` to control resources given to mbuffer and samtools sort in the bwameme module [#570](https://github.com/nf-core/raredisease/pull/570)
+
+### `Changed`
+
+- Update default vep container from v110-v112 [#609](https://github.com/nf-core/raredisease/pull/609)
+- Default index for vcfanno extra annotation files from tbi to csi [#606](https://github.com/nf-core/raredisease/pull/606)
+- Updated the model for Sentieon DNAScope to v1.1 [#601](https://github.com/nf-core/raredisease/pull/601)
+- bwameme can no longer be used to align mitochondrial reads [#600](https://github.com/nf-core/raredisease/pull/600)
+- Males' X and Y chromosomes will be treated as haploids during variant calling by deepvariant [#598](https://github.com/nf-core/raredisease/pull/598)
+- Acceptable type for lane field in the samplesheet from number to string [#597](https://github.com/nf-core/raredisease/pull/597)
+- Allow `0` as a valid value for `sex` in the samplesheet [#595](https://github.com/nf-core/raredisease/pull/595)
+- Updated deepvariant to version 1.6.1 [#587](https://github.com/nf-core/raredisease/pull/587)
+- Parallelized vcfanno [#585](https://github.com/nf-core/raredisease/pull/585)
+- Skip ROH calling with bcftools if there are no affected samples [#579](https://github.com/nf-core/raredisease/pull/579)
+- Refactored tool citation list [#577](https://github.com/nf-core/raredisease/pull/577)
+- Removed `skip_repeat_analysis` added in #571 [#574](https://github.com/nf-core/raredisease/pull/574)
+- Remove several skip parameters that had been included in the pipeline to avoid failed CI tests (see parameters table below) [#574](https://github.com/nf-core/raredisease/pull/574)
+- `readcount_intervals` parameter is now mandatory for running germlinecnvcaller. [#570](https://github.com/nf-core/raredisease/pull/570)
+- Turn off CNVnator, TIDDIT, SMNCopyNumberCaller, Gens, and Vcf2cytosure for targeted analysis [#573](https://github.com/nf-core/raredisease/pull/573)
+
+### `Fixed`
+
+- Issues that cropped up when `aligner` and `mt_aligner` were different [#605](https://github.com/nf-core/raredisease/pull/605)
+- Update docs to show 'vep_plugin_files' as a mandatory parameter for SNV annotation [#594](https://github.com/nf-core/raredisease/pull/594)
+- Error in SVDB merge when only a single SV caller is run [#586](https://github.com/nf-core/raredisease/pull/586)
+- Errors due to misplaced version statements [#578](https://github.com/nf-core/raredisease/pull/578)
+- Stub crashes due to peddy reported in [#566](https://github.com/nf-core/raredisease/issues/566) [#576](https://github.com/nf-core/raredisease/pull/576]
+- Docker manifest error from gnu-wget container [#570](https://github.com/nf-core/raredisease/pull/570)
+- Citations for bwameme [#563](https://github.com/nf-core/raredisease/pull/563)
+
+### Parameters
+
+| Old parameter | New parameter |
+| --------------- | ------------------------ |
+| | mbuffer_mem |
+| | mt_aligner |
+| | samtools_sort_threads |
+| | skip_repeat_calling |
+| | skip_snv_calling |
+| | skip_sv_calling |
+| skip_eklipse | |
+| skip_fastqc | |
+| skip_haplocheck | |
+| skip_qualimap | |
+| | skip_smncopynumbercaller |
+| | skip_repeat_annotation |
+| | scatter_count |
+| | vcfanno_extra_resources |
+
+### Tool updates
+
+| Tool | Old version | New version |
+| ----------- | ----------- | ----------- |
+| Deepvariant | 1.5.0 | 1.6.1 |
+| ensemblvep | 110 | 112 |
+
## 2.1.0 - Obelix [2024-05-29]
### `Added`
diff --git a/CITATIONS.md b/CITATIONS.md
index 36b3cd7b..1db771ac 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -22,6 +22,10 @@
> Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE; 2019:314-324. doi:10.1109/IPDPS.2019.00041
+- [BWA-MEME](https://academic.oup.com/bioinformatics/article/38/9/2404/6543607)
+
+ > Jung Y, Han D. BWA-MEME: BWA-MEM emulated with a machine learning approach. Bioinformatics. 2022;38(9):2404-2413. doi:10.1093/bioinformatics/btac137
+
- [CADD1](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9), [2](https://academic.oup.com/nar/article/47/D1/D886/5146191)
> Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 02488444..964d29bf 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/raredisease/
custom_logo_title: "nf-core/raredisease"
report_comment: >
- This report has been generated by the nf-core/raredisease
+ This report has been generated by the nf-core/raredisease
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
report_section_order:
"nf-core-raredisease-methods-description":
order: -1000
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 38249daa..3b4703e4 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -15,8 +15,9 @@
"errorMessage": "Sample name must be provided and cannot contain spaces"
},
"lane": {
- "type": "number",
- "meta": ["lane"]
+ "type": "string",
+ "meta": ["lane"],
+ "pattern": "^\\S+$"
},
"fastq_1": {
"type": "string",
@@ -42,7 +43,7 @@
"sex": {
"type": "string",
"meta": ["sex"],
- "enum": ["1", "2", "other"],
+ "enum": ["1", "2", "0", "other"],
"errorMessage": "Sex must be provided and cannot contain spaces"
},
"phenotype": {
diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config
index a616272b..3fdbdbaa 100644
--- a/conf/modules/align_MT.config
+++ b/conf/modules/align_MT.config
@@ -23,12 +23,6 @@ process {
ext.prefix = { "${meta.id}_sorted" }
}
- withName: '.*ALIGN_MT:BWAMEME_MEM_MT' {
- ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" }
- ext.args2 = { "-T ./samtools_sort_tmp" }
- ext.prefix = { "${meta.id}_sorted" }
- }
-
withName: '.*ALIGN_MT:BWA_MEM_MT' {
ext.args = { "-M -K 100000000 -R ${meta.read_group}" }
ext.args2 = { "-T ./samtools_sort_tmp" }
@@ -80,12 +74,6 @@ process {
ext.prefix = { "${meta.id}_sorted_shifted" }
}
- withName: '.*ALIGN_MT_SHIFT:BWAMEME_MEM_MT' {
- ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" }
- ext.args2 = { "-T ./samtools_sort_tmp" }
- ext.prefix = { "${meta.id}_sorted_shifted" }
- }
-
withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' {
ext.args = { "-M -K 10000000 -R ${meta.read_group}" }
ext.args2 = { "-T ./samtools_sort_tmp" }
diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config
index 60bcd259..fe8d807f 100644
--- a/conf/modules/annotate_genome_snvs.config
+++ b/conf/modules/annotate_genome_snvs.config
@@ -18,6 +18,7 @@
process {
withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_ROH' {
+ ext.when = { meta.probands.size()>0 }
ext.args = { "--samples ${meta.probands.unique().first()} --skip-indels " }
ext.prefix = { "${meta.id}_roh" }
}
@@ -30,13 +31,18 @@ process {
ext.prefix = { "${input.simpleName}" }
}
+ withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' {
+ ext.prefix = { "${vcf.simpleName}_${intervals.simpleName}" }
+ }
+
withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' {
- ext.prefix = { "${vcf.simpleName}_vcfanno" }
+ ext.prefix = { "${meta.prefix}_vcfanno_${meta.scatterid}" }
}
withName: '.*ANNOTATE_GENOME_SNVS:UPD_SITES' {
ext.prefix = { "${vcf.simpleName}_upd_sites" }
ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"}
+ ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun }
}
withName: '.*ANNOTATE_GENOME_SNVS:UPD_REGIONS' {
@@ -73,14 +79,10 @@ process {
}
withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_VIEW' {
- ext.prefix = { "${vcf.simpleName}_filter" }
+ ext.prefix = { "${meta.prefix}_filter_${meta.scatterid}" }
ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' }
}
- withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' {
- ext.prefix = { "${vcf.simpleName}_${intervals.simpleName}" }
- }
-
withName: '.*ANNOTATE_GENOME_SNVS:ENSEMBLVEP_SNV' {
ext.prefix = { "${meta.prefix}_vep_${meta.scatterid}" }
ext.args = { [
diff --git a/conf/modules/annotate_repeat_expansions.config b/conf/modules/annotate_repeat_expansions.config
new file mode 100644
index 00000000..c1d934ea
--- /dev/null
+++ b/conf/modules/annotate_repeat_expansions.config
@@ -0,0 +1,41 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Repeat expansion calling options
+//
+
+process {
+
+ withName: '.*ANNOTATE_REPEAT_EXPANSIONS:STRANGER' {
+ ext.prefix = { "${meta.id}_stranger" }
+ }
+
+ withName: '.*ANNOTATE_REPEAT_EXPANSIONS:COMPRESS_STRANGER' {
+ ext.prefix = { "${meta.id}_repeat_expansion_stranger" }
+ ext.args = '--output-type z'
+ publishDir = [
+ path: { "${params.outdir}/repeat_expansions" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ ]
+ }
+
+ withName: '.*ANNOTATE_REPEAT_EXPANSIONS:INDEX_STRANGER' {
+ publishDir = [
+ path: { "${params.outdir}/repeat_expansions" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ ]
+ }
+}
diff --git a/conf/modules/call_repeat_expansions.config b/conf/modules/call_repeat_expansions.config
index a3c140a0..fd6738ae 100644
--- a/conf/modules/call_repeat_expansions.config
+++ b/conf/modules/call_repeat_expansions.config
@@ -58,25 +58,4 @@ process {
ext.args = {"--notag"}
}
- withName: '.*CALL_REPEAT_EXPANSIONS:STRANGER' {
- ext.prefix = { "${meta.id}_stranger" }
- }
-
- withName: '.*CALL_REPEAT_EXPANSIONS:COMPRESS_STRANGER' {
- ext.prefix = { "${meta.id}_repeat_expansion_stranger" }
- ext.args = '--output-type z'
- publishDir = [
- path: { "${params.outdir}/repeat_expansions" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- ]
- }
-
- withName: '.*CALL_REPEAT_EXPANSIONS:INDEX_STRANGER' {
- publishDir = [
- path: { "${params.outdir}/repeat_expansions" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- ]
- }
}
diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config
index 02c2b384..348e95cc 100644
--- a/conf/modules/call_snv_deepvariant.config
+++ b/conf/modules/call_snv_deepvariant.config
@@ -22,7 +22,10 @@ process {
}
withName: '.*CALL_SNV_DEEPVARIANT:DEEPVARIANT' {
- ext.args = { "--model_type=${params.analysis_type.toUpperCase()}" }
+ ext.args = { [
+ "--model_type=${params.analysis_type.toUpperCase()}",
+ meta.sex == "1" ? params.genome == 'GRCh37' ? '--haploid_contigs="X,Y"' : '--haploid_contigs="chrX,chrY"' : ''
+ ].join(' ') }
ext.prefix = { "${meta.id}_deepvar" }
}
diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config
index 9ec4d47b..025172fe 100644
--- a/conf/modules/prepare_references.config
+++ b/conf/modules/prepare_references.config
@@ -26,7 +26,7 @@ process {
}
withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_GENOME' {
- ext.when = {!params.bwamem2 && params.aligner == "bwamem2"}
+ ext.when = {!params.bwamem2 && (params.aligner == "bwamem2" || params.mt_aligner == "bwamem2")}
}
withName: '.*PREPARE_REFERENCES:BWAMEME_INDEX_GENOME' {
@@ -34,29 +34,24 @@ process {
ext.when = {!params.bwameme && params.aligner == "bwameme"}
}
- withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
- ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "bwamem2"}
+ withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' {
+ ext.when = {!params.bwa && (!(params.aligner == "sentieon") || params.mt_aligner == "bwa")}
}
- withName: '.*PREPARE_REFERENCES:BWAMEME_INDEX_MT_SHIFT' {
- ext.args = '-a meme'
- ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwameme"}
+ withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' {
+ ext.when = {!params.bwa && (params.aligner == "sentieon" || params.mt_aligner == "sentieon")}
}
- withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' {
- ext.when = {!params.bwa && params.aligner == "sentieon"}
+ withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
+ ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
- ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "sentieon"}
+ ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
}
withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' {
- ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.aligner == "bwa"}
- }
-
- withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' {
- ext.when = {!params.bwa && (!(params.aligner == "sentieon") || params.aligner == "bwa")}
+ ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
}
withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' {
@@ -105,6 +100,10 @@ process {
ext.when = { !params.target_bed.equals(null) && !params.target_bed.endsWith(".gz") }
}
+ withName: '.*PREPARE_REFERENCES:TABIX_BGZIPINDEX_VCFANNOEXTRA' {
+ ext.args2 = '--csi'
+ }
+
withName: '.*PREPARE_REFERENCES:GATK_BILT' {
ext.when = { !params.target_bed.equals(null) }
ext.prefix = { "${meta.id}_target" }
@@ -124,19 +123,4 @@ process {
ext.when = { (params.vep_cache && params.vep_cache.endsWith("tar.gz")) }
}
- withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WGS' {
- ext.args = { [
- '--padding 0',
- '--interval-merging-rule OVERLAPPING_ONLY',
- "--exclude-intervals ${params.mito_name}",
- "--tmp-dir ./"
- ].join(' ') }
- ext.when = { params.analysis_type.equals("wgs") && !params.readcount_intervals }
- }
-
- withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WES' {
- ext.args = { "--bin-length 0 --interval-merging-rule OVERLAPPING_ONLY --exclude-intervals ${params.mito_name}" }
- ext.when = { params.analysis_type.equals("wes") && !params.readcount_intervals }
- }
-
}
diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config
index 29cd3900..a2a17364 100644
--- a/conf/modules/qc_bam.config
+++ b/conf/modules/qc_bam.config
@@ -35,10 +35,8 @@ process {
ext.prefix = { "${meta.id}_hsmetrics" }
}
- if (!params.skip_qualimap) {
- withName: '.*QC_BAM:QUALIMAP_BAMQC' {
- ext.prefix = { "${meta.id}_qualimap" }
- }
+ withName: '.*QC_BAM:QUALIMAP_BAMQC' {
+ ext.prefix = { "${meta.id}_qualimap" }
}
withName: '.*QC_BAM:TIDDIT_COV' {
diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config
index 4499800d..5298800d 100644
--- a/conf/modules/raredisease.config
+++ b/conf/modules/raredisease.config
@@ -21,6 +21,7 @@ process {
ext.args = '--quiet --dir ./'
publishDir = [
path: { "${params.outdir}/fastqc/${meta.id}" },
+ mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}
@@ -67,6 +68,7 @@ process {
withName: '.*RAREDISEASE:CREATE_PEDIGREE_FILE' {
publishDir = [
path: { "${params.outdir}/pedigree" },
+ mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}
diff --git a/conf/modules/scatter_genome.config b/conf/modules/scatter_genome.config
index b2fe363a..07c3602e 100644
--- a/conf/modules/scatter_genome.config
+++ b/conf/modules/scatter_genome.config
@@ -21,7 +21,7 @@ process {
}
withName: '.*SCATTER_GENOME:GATK4_SPLITINTERVALS' {
- ext.args = { "--subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION --scatter-count 22" }
+ ext.args = { "--scatter-count ${params.scatter_count}" }
ext.when = { !params.skip_snv_annotation }
ext.prefix = { "${meta.id}_genome_intervals" }
publishDir = [
diff --git a/conf/test.config b/conf/test.config
index 85a2e404..f0c503c9 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -24,11 +24,7 @@ params {
mito_name = 'MT'
// analysis params
- skip_eklipse = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
- skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_germlinecnvcaller = true
- skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
- skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI
skip_peddy = true
@@ -44,7 +40,7 @@ params {
intervals_wgs = params.pipelines_testdata_base_path + 'raredisease/reference/target_wgs.interval_list'
intervals_y = params.pipelines_testdata_base_path + 'raredisease/reference/targetY.interval_list'
known_dbsnp = params.pipelines_testdata_base_path + 'raredisease/reference/dbsnp_-138-.vcf.gz'
- ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model'
+ ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model'
mobile_element_references = params.pipelines_testdata_base_path + 'raredisease/reference/mobile_element_references.tsv'
mobile_element_svdb_annotations = params.pipelines_testdata_base_path + 'raredisease/reference/svdb_querydb_files.csv'
reduced_penetrance = params.pipelines_testdata_base_path + 'raredisease/reference/reduced_penetrance.tsv'
diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index 4f641aac..1aad988b 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -24,11 +24,7 @@ params {
mito_name = 'MT'
// analysis params
- skip_eklipse = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
- skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_germlinecnvcaller = true
- skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
- skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI
skip_peddy = true
@@ -44,7 +40,7 @@ params {
intervals_wgs = params.pipelines_testdata_base_path + 'raredisease/reference/target_wgs.interval_list'
intervals_y = params.pipelines_testdata_base_path + 'raredisease/reference/targetY.interval_list'
known_dbsnp = params.pipelines_testdata_base_path + 'raredisease/reference/dbsnp_-138-.vcf.gz'
- ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model'
+ ml_model = 'https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model'
mobile_element_references = params.pipelines_testdata_base_path + 'raredisease/reference/mobile_element_references.tsv'
mobile_element_svdb_annotations = params.pipelines_testdata_base_path + 'raredisease/reference/svdb_querydb_files.csv'
reduced_penetrance = params.pipelines_testdata_base_path + 'raredisease/reference/reduced_penetrance.tsv'
diff --git a/conf/test_sentieon.config b/conf/test_sentieon.config
index b440d47d..6776eb6a 100644
--- a/conf/test_sentieon.config
+++ b/conf/test_sentieon.config
@@ -38,7 +38,7 @@ params {
intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
- ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
+ ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model"
mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
mobile_element_svdb_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv"
reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
diff --git a/docs/output.md b/docs/output.md
index 3241f5d4..d7f608b7 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -17,6 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Mapping](#mapping)
- [Bwa-mem2](#bwa-mem2)
- [BWA](#bwa)
+ - [BWA-MEME](#bwa-meme)
- [Sentieon bwa mem](#sentieon-bwa-mem)
- [Duplicate marking](#duplicate-marking)
- [Picard's MarkDuplicates](#picards-markduplicates)
@@ -88,6 +89,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
[BWA](https://github.com/lh3/bwa) used to map the reads to a reference genome. The aligned reads are coordinate sorted with samtools sort. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to bwa.
+##### BWA-MEME
+
+[BWA-MEME](https://github.com/kaist-ina/BWA-MEME) used to map the reads to a reference genome. The aligned reads are coordinate sorted with samtools sort. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to bwameme.
+
##### Sentieon bwa mem
[Sentieon's bwa mem](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/#map-reads-to-reference) is the software accelerated version of the bwa-mem algorithm. It is used to efficiently perform the alignment using BWA. Aligned reads are then coordinate sorted using Sentieon's [sort](https://support.sentieon.com/manual/usages/general/#util-syntax) utility. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to "sentieon".
@@ -96,7 +101,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
##### Picard's MarkDuplicates
-[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
+[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2, bwameme and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
Output files from Alignment
@@ -382,8 +387,8 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e
Output files
- `annotate_snv/genome`
- - `_rhocall_vcfanno_filter_.vcf.gz`: file containing bcftools roh, vcfanno, cadd and vep annotations.
- - `_rhocall_vcfanno_filter_.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, cadd and vep annotations.
+ - `__filter_.vcf.gz`: file containing bcftools roh, vcfanno, cadd and vep annotations.
+ - `__filter_.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, cadd and vep annotations.
diff --git a/docs/usage.md b/docs/usage.md
index 84aca20a..0304c46a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -102,17 +102,17 @@ A samplesheet is used to pass the information about the sample(s), such as the p
nf-core/raredisease will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The pedigree information in the samplesheet (sex and phenotype) should be provided as they would be for a [ped file](https://gatk.broadinstitute.org/hc/en-us/articles/360035531972-PED-Pedigree-format) (i.e. 1 for male, 2 for female, other for unknown).
-| Fields | Description |
-| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `lane` | Used to generate separate channels during the alignment step. |
-| `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
-| `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
-| `sex` | Sex (1=male; 2=female; other=unknown). |
-| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). |
-| `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. |
-| `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. |
-| `case_id` | Case ID, for the analysis used when generating a family VCF. |
+| Fields | Description |
+| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
+| `lane` | Used to generate separate channels during the alignment step. It is of string type, and we recommend using a combination of flowcell and lane to distinguish between different runs of the same sample. |
+| `fastq_1` | Absolute path to FASTQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
+| `fastq_2` | Absolute path to FASTQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
+| `sex` | Sex (1=male; 2=female; for unknown sex use 0 or 'other'). |
+| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). |
+| `paternal_id` | Sample ID of the father, can be blank if the father isn't part of the analysis or for samples other than the proband. |
+| `maternal_id` | Sample ID of the mother, can be blank if the mother isn't part of the analysis or for samples other than the proband. |
+| `case_id` | Case ID, for the analysis used when generating a family VCF. |
It is also possible to include multiple runs of the same sample in a samplesheet. For example, when you have re-sequenced the same sample more than once to increase sequencing depth. In that case, the `sample` identifiers in the samplesheet have to be the same. The pipeline will align the raw read/read-pairs independently before merging the alignments belonging to the same sample. Below is an example for a trio with the proband sequenced across two lanes:
@@ -139,7 +139,7 @@ Note that the pipeline is modular in architecture. It offers you the flexibility
nf-core/raredisease consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories:
-1. Alignment (bwamem2/bwa/Sentieon BWA mem)
+1. Alignment (bwamem2/bwa/bwameme/Sentieon BWA mem)
2. QC stats from the alignment files
3. Repeat expansions (ExpansionsHunter & Stranger)
4. Variant calling - SNV (DeepVariant/Sentieon DNAscope)
@@ -162,14 +162,15 @@ The mandatory and optional parameters for each category are tabulated below.
| aligner1 | fasta_fai4 |
| fasta2 | bwamem24 |
| platform | bwa4 |
-| mito_name/mt_fasta3 | known_dbsnp5 |
+| mito_name/mt_fasta3 | bwameme4 |
+| | known_dbsnp5 |
| | known_dbsnp_tbi5 |
| | min_trimmed_length6 |
-1Default value is bwamem2. Other alternatives are bwa and sentieon (requires valid Sentieon license ).
+1Default value is bwamem2. Other alternatives are bwa, bwameme and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
3If mito_name is provided, mt_fasta can be generated by the pipeline.
-4fasta_fai, bwa and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+4fasta_fai, bwa, bwamem2 and bwameme, if not provided by the user, will be generated by the pipeline when necessary.
5Used only by Sentieon.
6Default value is 40. Used only by fastp.
@@ -200,10 +201,12 @@ The mandatory and optional parameters for each category are tabulated below.
| ml_model2 | known_dbsnp_tbi2 |
| analysis_type3 | call_interval2 |
| | known_dbsnp_tbi2 |
+| | par_bed4 |
1Default variant caller is DeepVariant, but you have the option to use Sentieon as well.
2These parameters are only used by Sentieon.
3Default is WGS, but you have the option to choose WES as well.
+4This parameter is only used by Deepvariant.
##### 5. Variant calling - Structural variants
@@ -214,27 +217,30 @@ The mandatory and optional parameters for each category are tabulated below.
##### 6. Copy number variant calling
-| Mandatory | Optional |
-| ------------------------------ | ------------------------------- |
-| ploidy_model1 | readcount_intervals3 |
-| gcnvcaller_model1,2 | |
+| Mandatory | Optional |
+| --------------------------------- | -------- |
+| ploidy_model1,4 | |
+| gcnvcaller_model1,2,4 | |
+| readcount_intervals3,4 | |
1 Output from steps 3 & 4 of GATK's CNV calling pipeline run in cohort mode as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
2 Sample file can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gcnvmodels.tsv) (Note the header 'models' in the sample file).
3 Output from step 1 of GATK's CNV calling pipeline as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
+4 All these files can be generated using the germlinecnvcaller tool option in nf-core/createpanelrefs.
##### 7. SNV annotation & Ranking
-| Mandatory | Optional |
-| ------------------------------------ | --------------------------------------------- |
-| genome1 | reduced_penetrance8 |
-| vcfanno_resources2 | vcfanno_lua |
-| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt9 |
-| vep_cache_version | cadd_resources10 |
-| vep_cache4 | vep_plugin_files11 |
-| gnomad_af5 | |
-| score_config_snv6 | |
-| variant_consequences_snv7 | |
+| Mandatory | Optional |
+| ------------------------------------ | ---------------------------------------------- |
+| genome1 | reduced_penetrance9 |
+| vcfanno_resources2 | vcfanno_lua |
+| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt10 |
+| vep_cache_version | cadd_resources11 |
+| vep_cache4 | |
+| gnomad_af5 | |
+| score_config_snv6 | |
+| variant_consequences_snv7 | |
+| vep_plugin_files8 | |
1Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.
2Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).
@@ -246,10 +252,10 @@ See example cache [here](https://raw.githubusercontent.com/nf-core/test-datasets
no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/gnomad_reformated.tab.gz).
6Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini).
7File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html).
-8Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
-9 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt). Not required if --skip_vep_filter is set to true.
-10Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
-11A CSV file that describes the files used by VEP's named and custom plugins. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vep_files.csv).
+8A CSV file that describes the files used by VEP's named and custom plugins. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vep_files.csv).
+9Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
+10 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt). Not required if --skip_vep_filter is set to true.
+11Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
:::note
We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (download files listed under the description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>")
@@ -547,4 +553,4 @@ plugins {
}
```
-This should go in your Nextflow confgiguration file, specified with `-c ` when running the pipeline.
+This should go in your Nextflow configuration file, specified with `-c ` when running the pipeline.
diff --git a/main.nf b/main.nf
index 838d75d7..1e80e917 100644
--- a/main.nf
+++ b/main.nf
@@ -39,6 +39,7 @@ params.mobile_element_references = getGenomeAttribute('mobile_element_refe
params.mobile_element_svdb_annotations = getGenomeAttribute('mobile_element_svdb_annotations')
params.ml_model = getGenomeAttribute('ml_model')
params.mt_fasta = getGenomeAttribute('mt_fasta')
+params.par_bed = getGenomeAttribute('par_bed')
params.ploidy_model = getGenomeAttribute('ploidy_model')
params.reduced_penetrance = getGenomeAttribute('reduced_penetrance')
params.readcount_intervals = getGenomeAttribute('readcount_intervals')
@@ -58,6 +59,7 @@ params.variant_consequences_sv = getGenomeAttribute('variant_consequence
params.vep_filters = getGenomeAttribute('vep_filters')
params.vep_filters_scout_fmt = getGenomeAttribute('vep_filters_scout_fmt')
params.vcf2cytosure_blacklist = getGenomeAttribute('vcf2cytosure_blacklist')
+params.vcfanno_extra_resources = getGenomeAttribute('vcfanno_extra_resources')
params.vcfanno_resources = getGenomeAttribute('vcfanno_resources')
params.vcfanno_toml = getGenomeAttribute('vcfanno_toml')
params.vcfanno_lua = getGenomeAttribute('vcfanno_lua')
diff --git a/modules.json b/modules.json
index 1b731501..cc525b80 100644
--- a/modules.json
+++ b/modules.json
@@ -82,7 +82,7 @@
},
"bwameme/mem": {
"branch": "master",
- "git_sha": "79480293280ff4f10f30bdea1ddd903f223f8489",
+ "git_sha": "0aa157a00b54bcbe2c50be375cafd68d928e7f4d",
"installed_by": ["modules"]
},
"cadd": {
@@ -113,7 +113,7 @@
},
"deepvariant": {
"branch": "master",
- "git_sha": "199ba086a259e1933d6e0ab7596e4a977bbd483a",
+ "git_sha": "a7e8b8afd4fa82f20d745fa778bfdbf39c1f7efb",
"installed_by": ["modules"]
},
"eklipse": {
@@ -123,12 +123,12 @@
},
"ensemblvep/filtervep": {
"branch": "master",
- "git_sha": "214d575774c172062924ad3564b4f66655600730",
+ "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
"installed_by": ["modules"]
},
"ensemblvep/vep": {
"branch": "master",
- "git_sha": "76a0696a60c41c57fc5f6040ac31b11ce5d4d8dd",
+ "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
"installed_by": ["modules"]
},
"expansionhunter": {
@@ -206,11 +206,6 @@
"git_sha": "cf607b7749da0a8f5ca2a1e31233e13e3159e2fe",
"installed_by": ["modules"]
},
- "gatk4/preprocessintervals": {
- "branch": "master",
- "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980",
- "installed_by": ["modules"]
- },
"gatk4/printreads": {
"branch": "master",
"git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980",
@@ -309,7 +304,7 @@
},
"peddy": {
"branch": "master",
- "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+ "git_sha": "bb8c1e039f2c1d603e8a93665d0386d5c6ac8e5e",
"installed_by": ["modules"]
},
"picard/addorreplacereadgroups": {
diff --git a/modules/local/get_chrom_sizes.nf b/modules/local/get_chrom_sizes.nf
index 4ab80ed1..e84dbe20 100644
--- a/modules/local/get_chrom_sizes.nf
+++ b/modules/local/get_chrom_sizes.nf
@@ -4,8 +4,8 @@ process GET_CHROM_SIZES {
conda "conda-forge::coreutils=8.31"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' :
- 'biocontainers/gnu-wget:1.18--0' }"
+ 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+ 'biocontainers/gnu-wget:1.18--h36e9172_9' }"
input:
tuple val(meta), path(fai)
diff --git a/modules/local/rename_align_files.nf b/modules/local/rename_align_files.nf
index da9f890b..40278ca5 100644
--- a/modules/local/rename_align_files.nf
+++ b/modules/local/rename_align_files.nf
@@ -4,8 +4,8 @@ process RENAME_ALIGN_FILES {
conda "conda-forge::coreutils=8.31"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' :
- 'biocontainers/gnu-wget:1.18--0' }"
+ 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h36e9172_9' :
+ 'biocontainers/gnu-wget:1.18--h36e9172_9' }"
input:
tuple val(meta), path(input)
diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf
index db41316e..2efc8c0f 100644
--- a/modules/nf-core/bwameme/mem/main.nf
+++ b/modules/nf-core/bwameme/mem/main.nf
@@ -12,6 +12,8 @@ process BWAMEME_MEM {
tuple val(meta2), path(index)
tuple val(meta3), path(fasta)
val sort_bam
+ val mbuffer
+ val samtools_threads
output:
tuple val(meta), path("*.sam") , emit: sam , optional:true
@@ -29,14 +31,20 @@ process BWAMEME_MEM {
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def samtools_command = sort_bam ? 'sort' : 'view'
- def mbuffer_mem = 3072
- if (!task.memory) {
- log.info '[bwameme-mbuffer] Available memory not known - defaulting to 3GB for mbuffer. Specify process memory requirements to change this.'
+ if (!mbuffer) {
+ log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3GB for mbuffer.'
+ mbuffer_mem = 3072
} else {
- mbuffer_mem = (task.memory.mega*0.5).intValue()
+ mbuffer_mem = mbuffer
}
- def mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : ""
- def mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/task.cpus).intValue()+"M" : ""
+ if (!samtools_threads) {
+ log.info 'Number of threads for samtools is not set - defaulting to 2 threads.'
+ threads = 2
+ } else {
+ threads = samtools_threads
+ }
+ mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : ""
+ mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/threads).intValue()+"M" : ""
def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/
def extension_matcher = (args2 =~ extension_pattern)
def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam"
@@ -53,7 +61,7 @@ process BWAMEME_MEM {
\$INDEX \\
$reads \\
$mbuffer_command \\
- | samtools $samtools_command $args2 $mem_per_thread -@ $task.cpus ${reference} -o ${prefix}.${extension} -
+ | samtools $samtools_command $args2 $mem_per_thread -@ $threads ${reference} -o ${prefix}.${extension} -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml
index c7eb7b28..e5d28db2 100644
--- a/modules/nf-core/bwameme/mem/meta.yml
+++ b/modules/nf-core/bwameme/mem/meta.yml
@@ -52,6 +52,12 @@ input:
type: boolean
description: use samtools sort (true) or samtools view (false)
pattern: "true or false"
+ - mbuffer:
+ type: integer
+ description: memory for mbuffer in megabytes (default 3072)
+ - sort_threads:
+ type: integer
+ description: number of threads to used during samtools sort (default 2).
output:
- meta:
diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test
index 3b67b39e..8175f58a 100644
--- a/modules/nf-core/bwameme/mem/tests/main.nf.test
+++ b/modules/nf-core/bwameme/mem/tests/main.nf.test
@@ -11,7 +11,7 @@ nextflow_process {
tag "bwameme/index"
config "./nextflow.config"
- test("sarscov2 - fastq, index, fasta, false") {
+ test("sarscov2 - fastq, index, fasta, false, 0, 4") {
setup {
run("BWAMEME_INDEX") {
@@ -38,6 +38,8 @@ nextflow_process {
input[1] = BWAMEME_INDEX.out.index
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = false
+ input[4] = 0
+ input[5] = 4
"""
}
}
@@ -54,7 +56,7 @@ nextflow_process {
}
- test("sarscov2 - fastq, index, fasta, true") {
+ test("sarscov2 - fastq, index, fasta, true, 2048, 4") {
setup {
run("BWAMEME_INDEX") {
@@ -81,6 +83,8 @@ nextflow_process {
input[1] = BWAMEME_INDEX.out.index
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = true
+ input[4] = 2048
+ input[5] = 4
"""
}
}
@@ -97,7 +101,7 @@ nextflow_process {
}
- test("sarscov2 - [fastq1, fastq2], index, fasta, false") {
+ test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4") {
setup {
run("BWAMEME_INDEX") {
@@ -127,6 +131,8 @@ nextflow_process {
input[1] = BWAMEME_INDEX.out.index
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = false
+ input[4] = 0
+ input[5] = 4
"""
}
}
@@ -143,7 +149,7 @@ nextflow_process {
}
- test("sarscov2 - [fastq1, fastq2], index, fasta, true") {
+ test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''") {
setup {
run("BWAMEME_INDEX") {
@@ -173,6 +179,8 @@ nextflow_process {
input[1] = BWAMEME_INDEX.out.index
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = true
+ input[4] = 2048
+ input[5] = ""
"""
}
}
@@ -189,7 +197,7 @@ nextflow_process {
}
- test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") {
+ test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub") {
options "-stub"
@@ -221,6 +229,8 @@ nextflow_process {
input[1] = BWAMEME_INDEX.out.index
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = true
+ input[4] = 2048
+ input[5] = 4
"""
}
}
diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap
index 281011ae..55235959 100644
--- a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap
+++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap
@@ -1,4 +1,17 @@
{
+ "sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-20T16:07:51.065498711"
+ },
"sarscov2 - [fastq1, fastq2], index, fasta, false": {
"content": [
"test.bam",
@@ -12,6 +25,84 @@
},
"timestamp": "2024-05-15T20:04:31.962017214"
},
+ "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-20T16:08:18.378362535"
+ },
+ "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-20T16:25:43.613918051"
+ },
+ "sarscov2 - fastq, index, fasta, false": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-15T20:00:05.782384898"
+ },
+ "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048 - stub": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-18T10:17:40.514767321"
+ },
+ "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-18T10:17:31.035368735"
+ },
+ "sarscov2 - fastq, index, fasta, true, 2048, 4": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-20T16:07:24.071789902"
+ },
"sarscov2 - [fastq1, fastq2], index, fasta, true - stub": {
"content": [
"test.bam",
@@ -25,6 +116,32 @@
},
"timestamp": "2024-05-15T19:28:46.895668666"
},
+ "sarscov2 - [fastq1, fastq2], index, fasta, false, 0": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-18T10:17:08.497131484"
+ },
+ "sarscov2 - fastq, index, fasta, false, 0": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-18T10:16:23.45126091"
+ },
"sarscov2 - [fastq1, fastq2], index, fasta, true": {
"content": [
"test.bam",
@@ -38,7 +155,7 @@
},
"timestamp": "2024-05-15T20:44:56.510177191"
},
- "sarscov2 - fastq, index, fasta, false": {
+ "sarscov2 - fastq, index, fasta, true, 2048": {
"content": [
"test.bam",
[
@@ -47,9 +164,9 @@
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "nextflow": "24.04.2"
},
- "timestamp": "2024-05-15T20:00:05.782384898"
+ "timestamp": "2024-06-18T10:16:46.541148031"
},
"sarscov2 - fastq, index, fasta, true": {
"content": [
@@ -63,5 +180,31 @@
"nextflow": "23.10.1"
},
"timestamp": "2024-05-15T20:44:05.2657749"
+ },
+ "sarscov2 - fastq, index, fasta, false, 0, 4": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-20T16:06:58.802149967"
+ },
+ "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,700d57071f430eb29b3c67d0a199eb95"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-20T16:08:28.453969552"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/deepvariant/README.md b/modules/nf-core/deepvariant/README.md
index ca112a7d..9d1ceb34 100644
--- a/modules/nf-core/deepvariant/README.md
+++ b/modules/nf-core/deepvariant/README.md
@@ -1,6 +1,8 @@
# Conda is not supported at the moment
-The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected
+The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected.
+
+See https://github.com/bioconda/bioconda-recipes/issues/30310 and https://github.com/nf-core/modules/issues/1754 for more information.
Hence, we are using the docker container provided by the authors of the tool:
diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf
index 507b6c11..8d3d0911 100644
--- a/modules/nf-core/deepvariant/main.nf
+++ b/modules/nf-core/deepvariant/main.nf
@@ -2,14 +2,17 @@ process DEEPVARIANT {
tag "$meta.id"
label 'process_high'
- //Conda is not supported at the moment
- container "nf-core/deepvariant:1.5.0"
+ // FIXME Conda is not supported at the moment
+ // BUG https://github.com/nf-core/modules/issues/1754
+ // BUG https://github.com/bioconda/bioconda-recipes/issues/30310
+ container "nf-core/deepvariant:1.6.1"
input:
tuple val(meta), path(input), path(index), path(intervals)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gzi)
+ tuple val(meta5), path(par_bed)
output:
tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf
@@ -29,6 +32,10 @@ process DEEPVARIANT {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def regions = intervals ? "--regions=${intervals}" : ""
+ def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : ""
+ // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755
+ // FIXME Revert this on next version bump
+ def VERSION = '1.6.1'
"""
/opt/deepvariant/bin/run_deepvariant \\
@@ -38,12 +45,13 @@ process DEEPVARIANT {
--output_gvcf=${prefix}.g.vcf.gz \\
${args} \\
${regions} \\
- --intermediate_results_dir=. \\
+ ${par_regions} \\
+ --intermediate_results_dir=tmp \\
--num_shards=${task.cpus}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' )
+ deepvariant: $VERSION
END_VERSIONS
"""
@@ -53,6 +61,9 @@ process DEEPVARIANT {
error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead."
}
prefix = task.ext.prefix ?: "${meta.id}"
+ // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755
+ // FIXME Revert this on next version bump
+ def VERSION = '1.6.1'
"""
touch ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi
@@ -61,7 +72,7 @@ process DEEPVARIANT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' )
+ deepvariant: $VERSION
END_VERSIONS
"""
}
diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml
index a50dc57d..2327dd5f 100644
--- a/modules/nf-core/deepvariant/meta.yml
+++ b/modules/nf-core/deepvariant/meta.yml
@@ -57,6 +57,15 @@ input:
type: file
description: GZI index of reference fasta file
pattern: "*.gzi"
+ - meta5:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - par_bed:
+ type: file
+ description: BED file containing PAR regions
+ pattern: "*.bed"
output:
- meta:
type: map
diff --git a/modules/nf-core/deepvariant/tests/main.nf.test b/modules/nf-core/deepvariant/tests/main.nf.test
index 91612c1e..17765233 100644
--- a/modules/nf-core/deepvariant/tests/main.nf.test
+++ b/modules/nf-core/deepvariant/tests/main.nf.test
@@ -31,6 +31,9 @@ nextflow_process {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
@@ -66,6 +69,48 @@ nextflow_process {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed") {
+ config "./nextflow-non-autosomal-calling.config"
+ tag "test"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [],[]
+ ]
+ input[4] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
"""
}
}
@@ -102,6 +147,9 @@ nextflow_process {
[ id:'genome'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true)
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap
index 6ad76ae4..04f87774 100644
--- a/modules/nf-core/deepvariant/tests/main.nf.test.snap
+++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap
@@ -1,269 +1,358 @@
{
- "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": {
- "content": [
- {
- "0": [
- [
+ "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "2": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "4": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ],
- "gvcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "gvcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "vcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "vcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "versions": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ]
- }
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T12:09:40.987117305"
},
- "timestamp": "2024-03-20T13:54:42.757335334"
- },
- "homo_sapiens - [bam, bai] - fasta - fai": {
- "content": [
- {
- "0": [
- [
+ "homo_sapiens - [bam, bai] - fasta - fai": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "2": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "4": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ],
- "gvcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "gvcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "vcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "vcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "versions": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ]
- }
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T12:08:47.058887374"
},
- "timestamp": "2024-03-20T13:54:18.409489045"
- },
- "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": {
- "content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "2": [
- [
+ "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "4": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ],
- "gvcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "gvcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "vcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "vcf_tbi": [
- [
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T12:09:13.952808655"
+ },
+ "homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "versions": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ]
- }
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-20T13:54:30.523871801"
- }
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-23T14:29:24.939680679"
+ }
}
diff --git a/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config
new file mode 100644
index 00000000..4be8986b
--- /dev/null
+++ b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config
@@ -0,0 +1,8 @@
+process {
+
+ withName: DEEPVARIANT {
+ ext.args = '--model_type=WGS --haploid_contigs chr22'
+ ext.prefix = { "${meta.id}_out" }
+ }
+
+}
diff --git a/modules/nf-core/ensemblvep/filtervep/environment.yml b/modules/nf-core/ensemblvep/filtervep/environment.yml
index d84dc89e..283a45bb 100644
--- a/modules/nf-core/ensemblvep/filtervep/environment.yml
+++ b/modules/nf-core/ensemblvep/filtervep/environment.yml
@@ -1,7 +1,5 @@
-name: ensemblvep_filtervep
channels:
- conda-forge
- bioconda
- - defaults
dependencies:
- - bioconda::ensembl-vep=110.0
+ - bioconda::ensembl-vep=112.0
diff --git a/modules/nf-core/ensemblvep/filtervep/main.nf b/modules/nf-core/ensemblvep/filtervep/main.nf
index 53abf772..a56bdb83 100644
--- a/modules/nf-core/ensemblvep/filtervep/main.nf
+++ b/modules/nf-core/ensemblvep/filtervep/main.nf
@@ -4,8 +4,8 @@ process ENSEMBLVEP_FILTERVEP {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' :
- 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }"
+ 'https://depot.galaxyproject.org/singularity/ensembl-vep:112.0--pl5321h2a3209d_0' :
+ 'biocontainers/ensembl-vep:112.0--pl5321h2a3209d_0' }"
input:
tuple val(meta), path(input)
@@ -47,4 +47,3 @@ process ENSEMBLVEP_FILTERVEP {
END_VERSIONS
"""
}
-
diff --git a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test
new file mode 100644
index 00000000..1852e2ab
--- /dev/null
+++ b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test
@@ -0,0 +1,136 @@
+nextflow_process {
+
+ name "Test Process ENSEMBLVEP_FILTERVEP"
+ script "../main.nf"
+ process "ENSEMBLVEP_FILTERVEP"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "ensemblvep"
+ tag "ensemblvep/vep"
+ tag "ensemblvep/filtervep"
+ tag "ensemblvep/download"
+
+ // Test for filtering VCF file
+ test("test_ensemblvep_filtervep_vcf") {
+ config "./vcf.config"
+
+ setup {
+ run("ENSEMBLVEP_DOWNLOAD") {
+ script "../../download/main.nf"
+
+ process {
+ """
+ input[0] = Channel.of([
+ [id:"112_WBcel235"],
+ params.vep_genome,
+ params.vep_species,
+ params.vep_cache_version
+ ])
+ """
+ }
+ }
+ run("ENSEMBLVEP_VEP") {
+ script "../../vep/main.nf"
+
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
+ []
+ ])
+ input[1] = params.vep_genome
+ input[2] = params.vep_species
+ input[3] = params.vep_cache_version
+ input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
+ input[5] = Channel.value([
+ [id:"fasta"],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[6] = []
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = ENSEMBLVEP_VEP.out.vcf
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert path(process.out.output.get(0).get(1)).readLines().first().contains("##fileformat=VCFv4.2") }
+ )
+ }
+ }
+
+ // Test for filtering TAB file
+ test("test_ensemblvep_filtervep_tab_gz") {
+ config "./tab.gz.config"
+
+ setup {
+ run("ENSEMBLVEP_DOWNLOAD") {
+ script "../../download/main.nf"
+
+ process {
+ """
+ input[0] = Channel.of([
+ [id:"112_WBcel235"],
+ params.vep_genome,
+ params.vep_species,
+ params.vep_cache_version
+ ])
+ """
+ }
+ }
+ run("ENSEMBLVEP_VEP") {
+ script "../../vep/main.nf"
+
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
+ []
+ ])
+ input[1] = params.vep_genome
+ input[2] = params.vep_species
+ input[3] = params.vep_cache_version
+ input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
+ input[5] = Channel.value([
+ [id:"fasta"],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[6] = []
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = ENSEMBLVEP_VEP.out.tab
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert path(process.out.output.get(0).get(1)).readLines().first().contains("## ENSEMBL VARIANT EFFECT PREDICTOR v112.0") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap
new file mode 100644
index 00000000..ddaa1dc1
--- /dev/null
+++ b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap
@@ -0,0 +1,26 @@
+{
+ "test_ensemblvep_filtervep_vcf": {
+ "content": [
+ [
+ "versions.yml:md5,4d3217834548bbe6784e102e9348461d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-02T13:38:25.18143"
+ },
+ "test_ensemblvep_filtervep_tab_gz": {
+ "content": [
+ [
+ "versions.yml:md5,4d3217834548bbe6784e102e9348461d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-02T13:57:11.471669"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config b/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config
new file mode 100644
index 00000000..aee2e62b
--- /dev/null
+++ b/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config
@@ -0,0 +1,10 @@
+/*
+========================================================================================
+ Nextflow config file for running tests
+========================================================================================
+*/
+params {
+ vep_cache_version = "112"
+ vep_genome = "WBcel235"
+ vep_species = "caenorhabditis_elegans"
+}
diff --git a/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config b/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config
new file mode 100644
index 00000000..0aa5ea75
--- /dev/null
+++ b/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config
@@ -0,0 +1,24 @@
+/*
+========================================================================================
+ Nextflow config file for running tests
+========================================================================================
+*/
+
+process {
+
+ publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+ withName: ENSEMBLVEP_DOWNLOAD {
+ ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
+ }
+
+ withName: ENSEMBLVEP_VEP {
+ ext.args = '--tab'
+ ext.prefix = { "${meta.id}_vep" }
+ }
+
+ withName: ENSEMBLVEP_FILTERVEP {
+ ext.args = '--filter "Feature_type is Transcript"'
+ ext.suffix = "tab"
+ }
+}
diff --git a/modules/nf-core/ensemblvep/filtervep/tests/tags.yml b/modules/nf-core/ensemblvep/filtervep/tests/tags.yml
new file mode 100644
index 00000000..b43bf40d
--- /dev/null
+++ b/modules/nf-core/ensemblvep/filtervep/tests/tags.yml
@@ -0,0 +1,2 @@
+ensemblvep/filtervep:
+ - "modules/nf-core/ensemblvep/filtervep/**"
diff --git a/modules/nf-core/ensemblvep/filtervep/tests/vcf.config b/modules/nf-core/ensemblvep/filtervep/tests/vcf.config
new file mode 100644
index 00000000..0b65fe5c
--- /dev/null
+++ b/modules/nf-core/ensemblvep/filtervep/tests/vcf.config
@@ -0,0 +1,23 @@
+/*
+========================================================================================
+ Nextflow config file for running tests
+========================================================================================
+*/
+
+process {
+
+ publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+ withName: ENSEMBLVEP_DOWNLOAD {
+ ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
+ }
+
+ withName: ENSEMBLVEP_VEP {
+ ext.args = '--vcf'
+ ext.prefix = { "${meta.id}_vep" }
+ }
+
+ withName: ENSEMBLVEP_FILTERVEP {
+ ext.args = '--filter "Feature_type is Transcript"'
+ }
+}
diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml
index 7a127746..283a45bb 100644
--- a/modules/nf-core/ensemblvep/vep/environment.yml
+++ b/modules/nf-core/ensemblvep/vep/environment.yml
@@ -1,7 +1,5 @@
-name: ensemblvep_vep
channels:
- conda-forge
- bioconda
- - defaults
dependencies:
- - bioconda::ensembl-vep=110.0
+ - bioconda::ensembl-vep=112.0
diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf
index a7fc5ad1..5f33265b 100644
--- a/modules/nf-core/ensemblvep/vep/main.nf
+++ b/modules/nf-core/ensemblvep/vep/main.nf
@@ -4,8 +4,8 @@ process ENSEMBLVEP_VEP {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' :
- 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }"
+ 'https://depot.galaxyproject.org/singularity/ensembl-vep:112.0--pl5321h2a3209d_0' :
+ 'biocontainers/ensembl-vep:112.0--pl5321h2a3209d_0' }"
input:
tuple val(meta), path(vcf), path(custom_extra_files)
@@ -20,7 +20,7 @@ process ENSEMBLVEP_VEP {
tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf
tuple val(meta), path("*.tab.gz") , optional:true, emit: tab
tuple val(meta), path("*.json.gz") , optional:true, emit: json
- path "*.summary.html" , optional:true, emit: report
+ path "*.html" , optional:true, emit: report
path "versions.yml" , emit: versions
when:
@@ -57,10 +57,10 @@ process ENSEMBLVEP_VEP {
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- touch ${prefix}.vcf.gz
- touch ${prefix}.tab.gz
- touch ${prefix}.json.gz
- touch ${prefix}.summary.html
+ echo "" | gzip > ${prefix}.vcf.gz
+ echo "" | gzip > ${prefix}.tab.gz
+ echo "" | gzip > ${prefix}.json.gz
+ touch ${prefix}_summary.html
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test
index f072dcab..e68fff3c 100644
--- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test
+++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test
@@ -1,26 +1,31 @@
nextflow_process {
name "Test Process ENSEMBLVEP_VEP"
- script "modules/nf-core/ensemblvep/vep/main.nf"
+ script "../main.nf"
process "ENSEMBLVEP_VEP"
config "./nextflow.config"
+
tag "modules"
tag "modules_nfcore"
tag "ensemblvep"
tag "ensemblvep/vep"
tag "ensemblvep/download"
-
test("test_ensemblvep_vep_fasta_vcf") {
-
config "./vcf.config"
setup {
run("ENSEMBLVEP_DOWNLOAD") {
script "../../download/main.nf"
+
process {
"""
- input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version])
+ input[0] = Channel.of([
+ [id:"112_WBcel235"],
+ params.vep_genome,
+ params.vep_species,
+ params.vep_cache_version
+ ])
"""
}
}
@@ -31,7 +36,7 @@ nextflow_process {
"""
input[0] = Channel.of([
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
])
input[1] = params.vep_genome
@@ -40,7 +45,7 @@ nextflow_process {
input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
input[5] = Channel.value([
[id:"fasta"],
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[6] = []
"""
@@ -49,23 +54,29 @@ nextflow_process {
then {
assertAll(
- {assert process.success},
- {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")}
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }
)
}
}
test("test_ensemblvep_vep_fasta_tab_gz") {
-
config "./tab.gz.config"
setup {
run("ENSEMBLVEP_DOWNLOAD") {
script "../../download/main.nf"
+
process {
"""
- input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version])
+ input[0] = Channel.of([
+ [id:"112_WBcel235"],
+ params.vep_genome,
+ params.vep_species,
+ params.vep_cache_version
+ ])
"""
}
}
@@ -76,7 +87,7 @@ nextflow_process {
"""
input[0] = Channel.of([
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
])
input[1] = params.vep_genome
@@ -85,7 +96,7 @@ nextflow_process {
input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
input[5] = Channel.value([
[id:"fasta"],
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[6] = []
"""
@@ -94,8 +105,9 @@ nextflow_process {
then {
assertAll(
- {assert process.success},
- {assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v110.0")}
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v112.0") }
)
}
}
diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap
new file mode 100644
index 00000000..1c4c0e4e
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap
@@ -0,0 +1,26 @@
+{
+ "test_ensemblvep_vep_fasta_tab_gz": {
+ "content": [
+ [
+ "versions.yml:md5,d06f1eb60f534489026d682eb3aa5559"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-02T10:15:18.228927"
+ },
+ "test_ensemblvep_vep_fasta_vcf": {
+ "content": [
+ [
+ "versions.yml:md5,d06f1eb60f534489026d682eb3aa5559"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.4"
+ },
+ "timestamp": "2024-09-02T10:14:50.193861"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config
index cfaef733..9aa48164 100644
--- a/modules/nf-core/ensemblvep/vep/tests/nextflow.config
+++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config
@@ -1,13 +1,12 @@
params {
- vep_cache_version = "110"
- vep_genome = "WBcel235"
- vep_species = "caenorhabditis_elegans"
+ vep_cache_version = "112"
+ vep_genome = "WBcel235"
+ vep_species = "caenorhabditis_elegans"
}
process {
-
withName: ENSEMBLVEP_DOWNLOAD {
- ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
+ ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
+ ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" }
}
-
}
diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml
deleted file mode 100644
index ec0b09e9..00000000
--- a/modules/nf-core/gatk4/preprocessintervals/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: gatk4_preprocessintervals
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - bioconda::gatk4=4.5.0.0
diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf
deleted file mode 100644
index dffc4bb1..00000000
--- a/modules/nf-core/gatk4/preprocessintervals/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process GATK4_PREPROCESSINTERVALS {
- tag "$fasta"
- label 'process_medium'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0':
- 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }"
-
- input:
- tuple val(meta), path(fasta)
- tuple val(meta2), path(fai)
- tuple val(meta3), path(dict)
- tuple val(meta4), path(intervals)
- tuple val(meta5), path(exclude_intervals)
-
- output:
- tuple val(meta), path("*.interval_list"), emit: interval_list
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def include_command = intervals ? "--intervals $intervals" : ""
- def exclude_command = exclude_intervals ? "--exclude-intervals $exclude_intervals" : ""
-
- def avail_mem = 3072
- if (!task.memory) {
- log.info '[GATK PreprocessIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
- } else {
- avail_mem = (task.memory.mega*0.8).intValue()
- }
-
- """
- gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\
- PreprocessIntervals \\
- $include_command \\
- $exclude_command \\
- --reference $fasta \\
- --output ${prefix}.interval_list \\
- $args
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
- END_VERSIONS
- """
-
- stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
- """
- touch ${prefix}.interval_list
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml
deleted file mode 100644
index cf3f6ac4..00000000
--- a/modules/nf-core/gatk4/preprocessintervals/meta.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: "gatk4_preprocessintervals"
-description: Prepares bins for coverage collection.
-keywords:
- - bed
- - gatk4
- - interval
- - preprocessintervals
-tools:
- - "gatk4":
- description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size.
- homepage: https://gatk.broadinstitute.org/hc/en-us
- documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
- doi: "10.1158/1538-7445.AM2017-3590"
- licence: ["Apache-2.0"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing reference information
- e.g. [ id:'test' ]
- - fasta:
- type: file
- description: The reference fasta file
- pattern: "*.fasta"
- - meta2:
- type: map
- description: |
- Groovy Map containing reference information
- e.g. [ id:'test' ]
- - fai:
- type: file
- description: Index of reference fasta file
- pattern: "*.fasta.fai"
- - meta3:
- type: map
- description: |
- Groovy Map containing reference information
- e.g. [ id:'test' ]
- - dict:
- type: file
- description: GATK sequence dictionary
- pattern: "*.dict"
- - meta4:
- type: map
- description: |
- Groovy Map containing reference information
- e.g. [ id:'test' ]
- - intervals:
- type: file
- description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional)
- pattern: "*.{bed,interval_list}"
- - meta5:
- type: map
- description: |
- Groovy Map containing reference information
- e.g. [ id:'test' ]
- - exclude_intervals:
- type: file
- description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional)
- pattern: "*.{bed,interval_list}"
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing reference information
- e.g. [ id:'test' ]
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
- - interval_list:
- type: file
- description: Processed interval list file
- pattern: "*.{bed,interval_list}"
-authors:
- - "@ryanjameskennedy"
- - "@ViktorHy"
- - "@ramprasadn"
-maintainers:
- - "@ryanjameskennedy"
- - "@ViktorHy"
- - "@ramprasadn"
diff --git a/modules/nf-core/peddy/main.nf b/modules/nf-core/peddy/main.nf
index b6be28c6..0e533ec3 100644
--- a/modules/nf-core/peddy/main.nf
+++ b/modules/nf-core/peddy/main.nf
@@ -35,7 +35,7 @@ process PEDDY {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- peddy: \$( peddy --version 2>&1 | sed 's/peddy, version //' )
+ peddy: \$( peddy --version 2>&1 | tail -1 | sed 's/peddy, version //' )
END_VERSIONS
"""
@@ -49,6 +49,9 @@ process PEDDY {
touch ${prefix}.peddy.ped
touch ${prefix}.html
- touch versions.yml
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ peddy: \$( peddy --version 2>&1 | tail -1 | sed 's/peddy, version //' )
+ END_VERSIONS
"""
}
diff --git a/modules/nf-core/peddy/tests/main.nf.test b/modules/nf-core/peddy/tests/main.nf.test
new file mode 100644
index 00000000..892da6cf
--- /dev/null
+++ b/modules/nf-core/peddy/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process PEDDY"
+ script "modules/nf-core/peddy/main.nf"
+ process "PEDDY"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "peddy"
+
+ test("test - peddy - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ],
+ file(params.test_data['homo_sapiens']['genome']['justhusky_minimal_vcf_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['genome']['justhusky_minimal_vcf_gz_tbi'], checkIfExists: true)
+ ]
+ input[1] = file(params.test_data['homo_sapiens']['genome']['justhusky_ped'], checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(process.out).match()
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/peddy/tests/main.nf.test.snap b/modules/nf-core/peddy/tests/main.nf.test.snap
new file mode 100644
index 00000000..5de593f1
--- /dev/null
+++ b/modules/nf-core/peddy/tests/main.nf.test.snap
@@ -0,0 +1,93 @@
+{
+ "test - peddy - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test.vs.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.het_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test.ped_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test.sex_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.peddy.ped:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+
+ ],
+ "4": [
+ "versions.yml:md5,d3587e67aded68bcf24c47542efe012f"
+ ],
+ "csv": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.het_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test.ped_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test.sex_check.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test.vs.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "ped": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.peddy.ped:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "png": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,d3587e67aded68bcf24c47542efe012f"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-25T09:28:05.418978589"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/peddy/tests/tags.yml b/modules/nf-core/peddy/tests/tags.yml
new file mode 100644
index 00000000..d8324a74
--- /dev/null
+++ b/modules/nf-core/peddy/tests/tags.yml
@@ -0,0 +1,2 @@
+mosdepth:
+ - "modules/nf-core/peddy/**"
diff --git a/nextflow.config b/nextflow.config
index 928d3a8c..b6a62bee 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -27,21 +27,21 @@ params {
run_mt_for_wes = false
run_rtgvcfeval = false
save_mapped_as_cram = false
- skip_eklipse = false
+ scatter_count = 20
skip_fastp = false
- skip_fastqc = false
skip_gens = true
skip_germlinecnvcaller = false
- skip_haplocheck = false
skip_peddy = false
skip_me_calling = false
skip_me_annotation = false
skip_mt_annotation = false
- skip_qualimap = false
+ skip_repeat_annotation = false
+ skip_repeat_calling = false
+ skip_smncopynumbercaller = false
skip_snv_annotation = false
+ skip_snv_calling = false
skip_sv_annotation = false
- skip_me_annotation = false
- skip_mt_annotation = false
+ skip_sv_calling = false
skip_mt_subsample = false
skip_vcf2cytosure = true
skip_vep_filter = false
@@ -53,6 +53,9 @@ params {
// Alignment
aligner = 'bwamem2'
+ mt_aligner = 'bwamem2'
+ mbuffer_mem = 3072
+ samtools_sort_threads = 4
min_trimmed_length = 40
mt_subsample_rd = 150
mt_subsample_seed = 30
@@ -304,7 +307,7 @@ manifest {
description = """call and score variants from WGS/WES of rare disease patients"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '2.1.0'
+ version = '2.2.0'
doi = ''
}
@@ -316,6 +319,7 @@ includeConfig 'conf/modules/annotate_consequence_pli.config'
includeConfig 'conf/modules/annotate_genome_snvs.config'
includeConfig 'conf/modules/annotate_mt_snvs.config'
includeConfig 'conf/modules/annotate_structural_variants.config'
+includeConfig 'conf/modules/annotate_repeat_expansions.config'
includeConfig 'conf/modules/call_repeat_expansions.config'
includeConfig 'conf/modules/call_snv.config'
includeConfig 'conf/modules/call_structural_variants.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 80cdfd11..3ccdc1f6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -267,6 +267,14 @@
"description": "Path to mitochondrial FASTA genome file.",
"fa_icon": "fas fa-file"
},
+ "par_bed": {
+ "type": "string",
+ "exists": true,
+ "format": "path",
+ "fa_icon": "fas fa-file",
+ "pattern": "^\\S+\\.bed(\\.gz)?$",
+ "description": "Path to a BED file containing PAR regions (used by deepvariant)."
+ },
"ploidy_model": {
"type": "string",
"exists": true,
@@ -281,7 +289,7 @@
"fa_icon": "fas fa-file",
"description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling",
"format": "file-path",
- "help_text": "Generated by GATK4 preprocessintervals. If absent, pipeline can generate this file."
+ "help_text": "Generated by GATK4 preprocessintervals. It needs to be the same as the intervals used to generate the ploidy and cnv models."
},
"reduced_penetrance": {
"type": "string",
@@ -402,6 +410,14 @@
"fa_icon": "fas fa-file",
"description": "Path to vcf2cytosure blacklist file"
},
+ "vcfanno_extra_resources": {
+ "type": "string",
+ "exists": true,
+ "format": "file-path",
+ "description": "Path to a VCF file containing annotations.",
+ "help_text": "Can be used to supply case-specific annotations in addition to those provided using --vcfanno_resources",
+ "fa_icon": "fas fa-file"
+ },
"vcfanno_resources": {
"type": "string",
"exists": true,
@@ -469,9 +485,9 @@
"analysis_type": {
"type": "string",
"default": "wgs",
- "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.",
+ "description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'. This changes resources consumed and tools used.",
"fa_icon": "fas fa-align-center",
- "enum": ["wgs", "wes", "mito"]
+ "enum": ["wgs", "wes"]
},
"bwa_as_fallback": {
"type": "boolean",
@@ -482,9 +498,8 @@
"platform": {
"type": "string",
"default": "illumina",
- "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.",
- "fa_icon": "fas fa-align-center",
- "enum": ["illumina"]
+ "description": "Specifies the platform on which the reads were sequenced.",
+ "fa_icon": "fas fa-align-center"
},
"ngsbits_samplegender_method": {
"type": "string",
@@ -508,21 +523,17 @@
"description": "Specifies whether to generate and publish alignment files as cram instead of bam",
"fa_icon": "fas fa-toggle-on"
},
- "skip_fastqc": {
- "type": "boolean",
- "description": "Specifies whether or not to skip FASTQC.",
- "fa_icon": "fas fa-toggle-on"
+ "scatter_count": {
+ "type": "integer",
+ "default": 20,
+ "description": "Number of intervals to split your genome into (used to parallelize annotations)",
+ "fa_icon": "fas fa-less-than"
},
"skip_fastp": {
"type": "boolean",
"description": "Specifies whether or not to skip trimming with fastp.",
"fa_icon": "fas fa-toggle-on"
},
- "skip_haplocheck": {
- "type": "boolean",
- "description": "Specifies whether or not to skip haplocheck.",
- "fa_icon": "fas fa-toggle-on"
- },
"skip_gens": {
"type": "boolean",
"description": "Specifies whether or not to skip gens preprocessing subworkflow.",
@@ -533,21 +544,11 @@
"description": "Specifies whether or not to skip CNV calling using GATK's GermlineCNVCaller",
"fa_icon": "fas fa-toggle-on"
},
- "skip_eklipse": {
- "type": "boolean",
- "description": "Specifies whether or not to skip eKLIPse.",
- "fa_icon": "fas fa-toggle-on"
- },
"skip_peddy": {
"type": "boolean",
"description": "Specifies whether or not to skip peddy.",
"fa_icon": "fas fa-toggle-on"
},
- "skip_qualimap": {
- "type": "boolean",
- "description": "Specifies whether or not to skip Qualimap.",
- "fa_icon": "fas fa-toggle-on"
- },
"skip_me_calling": {
"type": "boolean",
"description": "Specifies whether or not to skip calling mobile elements, and the subsequent annotation step.",
@@ -568,16 +569,41 @@
"description": "Specifies whether or not to subsample mt alignment.",
"fa_icon": "fas fa-toggle-on"
},
+ "skip_repeat_annotation": {
+ "type": "boolean",
+ "description": "Specifies whether or not to skip annotation of repeat expansions.",
+ "fa_icon": "fas fa-toggle-on"
+ },
+ "skip_repeat_calling": {
+ "type": "boolean",
+ "description": "Specifies whether or not to skip calling of repeat expansions.",
+ "fa_icon": "fas fa-toggle-on"
+ },
+ "skip_smncopynumbercaller": {
+ "type": "boolean",
+ "description": "Specifies whether or not to skip smncopynumbercaller.",
+ "fa_icon": "fas fa-toggle-on"
+ },
"skip_snv_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotate SNV subworkflow.",
"fa_icon": "fas fa-toggle-on"
},
+ "skip_snv_calling": {
+ "type": "boolean",
+ "description": "Specifies whether or not to skip nuclear and mitochondrial SNV calling and annotation.",
+ "fa_icon": "fas fa-toggle-on"
+ },
"skip_sv_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotate structural variant subworkflow.",
"fa_icon": "fas fa-toggle-on"
},
+ "skip_sv_calling": {
+ "type": "boolean",
+ "description": "Specifies whether or not to skip nuclear and mitochondrial SV calling and annotation.",
+ "fa_icon": "fas fa-toggle-on"
+ },
"skip_vcf2cytosure": {
"type": "boolean",
"default": true,
@@ -601,10 +627,31 @@
"aligner": {
"type": "string",
"default": "bwamem2",
- "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.",
+ "description": "Specifies the alignment algorithm to use - available options are 'bwamem2', 'bwa', 'bwameme' and 'sentieon'.",
"fa_icon": "fas fa-align-center",
"enum": ["bwa", "bwamem2", "bwameme", "sentieon"]
},
+ "mt_aligner": {
+ "type": "string",
+ "default": "bwamem2",
+ "description": "Specifies the alignment algorithm to use - available options are 'bwamem2', 'bwa' and 'sentieon'.",
+ "fa_icon": "fas fa-align-center",
+ "enum": ["bwa", "bwamem2", "sentieon"]
+ },
+ "samtools_sort_threads": {
+ "type": "integer",
+ "default": 4,
+ "description": "Number of threads allocated for sorting alignment files (used only by bwameme)",
+ "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.",
+ "fa_icon": "fas fa-less-than"
+ },
+ "mbuffer_mem": {
+ "type": "integer",
+ "default": 3072,
+ "description": "Memory allocated for mbuffer in megabytes (used only by bwameme)",
+ "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.",
+ "fa_icon": "fas fa-less-than"
+ },
"min_trimmed_length": {
"type": "integer",
"default": 40,
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index 8822d1cb..b332c84c 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -21,11 +21,12 @@ workflow ALIGN {
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
- ch_mtshift_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ val_mbuffer_mem // integer: [mandatory] memory in megabytes
val_platform // string: [mandatory] illumina or a different technology
+ val_sort_threads // integer: [mandatory] number of sorting threads
main:
ch_bwamem2_bam = Channel.empty()
@@ -56,7 +57,9 @@ workflow ALIGN {
ch_genome_bwamemeindex,
ch_genome_fasta,
ch_genome_fai,
- val_platform
+ val_mbuffer_mem,
+ val_platform,
+ val_sort_threads
)
ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam
ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai
@@ -93,7 +96,6 @@ workflow ALIGN {
CONVERT_MT_BAM_TO_FASTQ.out.bam,
ch_genome_bwaindex,
ch_genome_bwamem2index,
- ch_genome_bwamemeindex,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai
@@ -104,7 +106,6 @@ workflow ALIGN {
CONVERT_MT_BAM_TO_FASTQ.out.bam,
ch_mtshift_bwaindex,
ch_mtshift_bwamem2index,
- ch_mtshift_bwamemeindex,
ch_mtshift_fasta,
ch_mtshift_dictionary,
ch_mtshift_fai
diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf
index d2a9581d..7a0c1d2c 100644
--- a/subworkflows/local/alignment/align_MT.nf
+++ b/subworkflows/local/alignment/align_MT.nf
@@ -5,7 +5,6 @@
include { BWA_MEM as BWA_MEM_MT } from '../../../modules/nf-core/bwa/mem/main'
include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main'
include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main'
-include { BWAMEME_MEM as BWAMEME_MEM_MT } from '../../../modules/nf-core/bwameme/mem/main'
include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main'
include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main'
include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main'
@@ -14,34 +13,29 @@ include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } fr
workflow ALIGN_MT {
take:
- ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ]
- ch_ubam // channel: [mandatory] [ val(meta), path(bam) ]
- ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ]
- ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ]
- ch_bwamemeindex // channel: [mandatory for bwameme] [ val(meta), path(index) ]
- ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
- ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ]
+ ch_ubam // channel: [mandatory] [ val(meta), path(bam) ]
+ ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ]
+ ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_dict // channel: [mandatory] [ val(meta), path(dict) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
main:
ch_versions = Channel.empty()
- if (params.aligner.equals("bwamem2")) {
+ if (params.mt_aligner.equals("bwamem2")) {
BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, ch_fasta, true)
ch_align = BWAMEM2_MEM_MT.out.bam
ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first())
- } else if (params.aligner.equals("sentieon")) {
+ } else if (params.mt_aligner.equals("sentieon")) {
SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai )
ch_align = SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] }
ch_versions = ch_versions.mix(SENTIEON_BWAMEM_MT.out.versions.first())
- } else if (params.aligner.equals("bwa")) {
+ } else if (params.mt_aligner.equals("bwa")) {
BWA_MEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, true )
ch_align = BWA_MEM_MT.out.bam
ch_versions = ch_versions.mix(BWA_MEM_MT.out.versions.first())
- } else if (params.aligner.equals("bwameme")) {
- BWAMEME_MEM_MT (ch_fastq, ch_bwamemeindex, ch_fasta, true)
- ch_align = BWAMEME_MEM_MT.out.bam
- ch_versions = ch_versions.mix(BWAMEME_MEM_MT.out.versions.first())
}
ch_align
.join(ch_ubam, failOnMismatch:true, failOnDuplicate:true)
diff --git a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf
index 7d635d51..15d3db9a 100644
--- a/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf
+++ b/subworkflows/local/alignment/align_bwa_bwamem2_bwameme.nf
@@ -21,8 +21,9 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME {
ch_bwameme_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ val_mbuffer_mem // integer: [mandatory] default: 3072
val_platform // string: [mandatory] default: illumina
-
+ val_sort_threads // integer: [mandatory] default: 4
main:
ch_versions = Channel.empty()
@@ -32,7 +33,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME {
ch_align = BWA.out.bam
ch_versions = ch_versions.mix(BWA.out.versions.first())
} else if (params.aligner.equals("bwameme")) {
- BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true )
+ BWAMEME_MEM ( ch_reads_input, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem, val_sort_threads )
ch_align = BWAMEME_MEM.out.bam
ch_versions = ch_versions.mix(BWAMEME_MEM.out.versions.first())
} else {
diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf
index e2f10a1e..396e2614 100644
--- a/subworkflows/local/annotate_genome_snvs.nf
+++ b/subworkflows/local/annotate_genome_snvs.nf
@@ -28,7 +28,8 @@ workflow ANNOTATE_GENOME_SNVS {
analysis_type // string: [mandatory] 'wgs' or 'wes'
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
- ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
+ ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ]
+ ch_vcfanno_resources // channel: [mandatory] [ [path(vcf1),path(index1),...,path(vcfn),path(indexn)] ]
ch_vcfanno_lua // channel: [mandatory] [ path(lua) ]
ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
val_vep_genome // string: [mandatory] GRCh37 or GRCh38
@@ -53,72 +54,57 @@ workflow ANNOTATE_GENOME_SNVS {
ZIP_TABIX_ROHCALL (RHOCALL_ANNOTATE.out.vcf)
- ZIP_TABIX_ROHCALL.out.gz_tbi
- .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]}
- .set { ch_vcf_in }
+ ch_vcf
+ .join(ZIP_TABIX_ROHCALL.out.gz_tbi, remainder: true)
+ .combine(ch_split_intervals)
+ .map { it ->
+ if (it[3].equals(null)) {
+ return [it[0] + [prefix: it[0].id, scatterid:it[4].baseName], it[1], it[2], it[4]]
+ } else {
+ return [it[0] + [prefix: it[0].id + "_rhocall", scatterid:it[5].baseName], it[3], it[4], it[5]]
+ }
+ }
+ .set { ch_vcf_scatter_in }
- VCFANNO (ch_vcf_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources)
+ GATK4_SELECTVARIANTS (ch_vcf_scatter_in)
- VCFANNO.out.vcf
- .map {meta, vcf ->
- def splitchannels = []
- for (int i=0; i< meta.upd_children.size(); i++) {
- upd_sample = meta.upd_children[i]
- new_meta = meta + [upd_child:upd_sample]
- splitchannels.add([new_meta,vcf])
- }
- return splitchannels
- }
- .flatten()
- .buffer (size: 2)
- .set { ch_upd_in }
+ GATK4_SELECTVARIANTS.out.vcf
+ .join(GATK4_SELECTVARIANTS.out.tbi)
+ .combine(ch_vcfanno_extra)
+ .set { ch_vcfanno_in }
- UPD_SITES(ch_upd_in)
- UPD_REGIONS(ch_upd_in)
- CHROMOGRAPH_SITES([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_SITES.out.bed)
- CHROMOGRAPH_REGIONS([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_REGIONS.out.bed, [[],[]])
+ VCFANNO (ch_vcfanno_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources)
ZIP_TABIX_VCFANNO (VCFANNO.out.vcf)
- //rhocall_viz
- ANNOTATE_RHOCALLVIZ(ZIP_TABIX_VCFANNO.out.gz_tbi, ch_samples, ch_genome_chrsizes)
-
BCFTOOLS_VIEW(ZIP_TABIX_VCFANNO.out.gz_tbi, [], [], []) // filter on frequencies
- TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf)
-
- BCFTOOLS_VIEW.out.vcf
- .join(TABIX_BCFTOOLS_VIEW.out.tbi, failOnMismatch:true, failOnDuplicate:true)
- .collect()
- .combine(ch_split_intervals)
- .map {
- meta, vcf, tbi, interval ->
- return [meta + [scatterid:interval.baseName, prefix: vcf.simpleName], vcf, tbi, interval]
- }
- .set { ch_vcf_scatter_in }
-
- GATK4_SELECTVARIANTS (ch_vcf_scatter_in)
-
// Annotating with CADD
if (params.cadd_resources != null) {
+ TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf)
+
+ BCFTOOLS_VIEW.out.vcf
+ .join(TABIX_BCFTOOLS_VIEW.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+ .set { ch_cadd_in }
+
ANNOTATE_CADD (
- GATK4_SELECTVARIANTS.out.vcf,
- GATK4_SELECTVARIANTS.out.tbi,
+ ch_cadd_in,
ch_cadd_header,
ch_cadd_resources
)
ch_cadd_vcf = ANNOTATE_CADD.out.vcf
ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
+ ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions)
}
// If CADD is run, pick CADD output as input for VEP else pass selectvariants output to VEP.
- GATK4_SELECTVARIANTS.out.vcf
+ BCFTOOLS_VIEW.out.vcf
.join(ch_cadd_vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null`
.branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null]
selvar: it[2].equals(null)
- return [it[0], it[1]]
+ return [it[0] + [prefix: it[0].prefix + "_filter"], it[1]]
cadd: !(it[2].equals(null))
- return [it[0] + [prefix: it[0].prefix + "_cadd"], it[2]]
+ return [it[0] + [prefix: it[0].prefix + "_filter_cadd"], it[2]]
}
.set { ch_for_mix }
@@ -126,7 +112,6 @@ workflow ANNOTATE_GENOME_SNVS {
.map { meta, vcf -> return [meta, vcf, []] }
.set { ch_vep_in }
-
// Annotating with ensembl Vep
ENSEMBLVEP_SNV(
ch_vep_in,
@@ -156,14 +141,37 @@ workflow ANNOTATE_GENOME_SNVS {
BCFTOOLS_CONCAT (ch_concat_in)
+ BCFTOOLS_CONCAT.out.vcf
+ .map {meta, vcf ->
+ def splitchannels = []
+ for (int i=0; i< meta.upd_children.size(); i++) {
+ upd_sample = meta.upd_children[i]
+ new_meta = meta + [upd_child:upd_sample, prefix: meta.prefix + "_vcfanno"]
+ splitchannels.add([new_meta,vcf])
+ }
+ return splitchannels
+ }
+ .flatten()
+ .buffer (size: 2)
+ .set { ch_upd_in }
+
+ UPD_SITES(ch_upd_in)
+ UPD_REGIONS(ch_upd_in)
+ CHROMOGRAPH_SITES([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_SITES.out.bed)
+ CHROMOGRAPH_REGIONS([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_REGIONS.out.bed, [[],[]])
+
+
BCFTOOLS_CONCAT.out.vcf
.map { meta, vcf -> [meta - meta.subMap('prefix'), vcf] }
.set { ch_concat_out }
TABIX_BCFTOOLS_CONCAT (ch_concat_out)
- ch_vep_ann = ch_concat_out
- ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi
+ ch_vep_ann = ch_concat_out
+ ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi
+ ch_vep_ann_index = ch_concat_out.join(TABIX_BCFTOOLS_CONCAT.out.tbi)
+ //rhocall_viz
+ ANNOTATE_RHOCALLVIZ(ch_vep_ann_index, ch_samples, ch_genome_chrsizes)
ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions)
ch_versions = ch_versions.mix(RHOCALL_ANNOTATE.out.versions)
@@ -175,7 +183,6 @@ workflow ANNOTATE_GENOME_SNVS {
ch_versions = ch_versions.mix(CHROMOGRAPH_REGIONS.out.versions)
ch_versions = ch_versions.mix(ZIP_TABIX_VCFANNO.out.versions)
ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions)
- ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions)
ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions.first())
ch_versions = ch_versions.mix(ENSEMBLVEP_SNV.out.versions.first())
ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first())
diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf
index 02554461..d2a82879 100644
--- a/subworkflows/local/annotate_mt_snvs.nf
+++ b/subworkflows/local/annotate_mt_snvs.nf
@@ -19,7 +19,9 @@ workflow ANNOTATE_MT_SNVS {
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_vcfanno_resources // channel: [mandatory] [ path(resources) ]
+ ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ]
+ ch_vcfanno_lua // channel: [mandatory] [ path(lua) ]
+ ch_vcfanno_resources // channel: [mandatory] [ [path(vcf1),path(index1),...,path(vcfn),path(indexn)] ]
ch_vcfanno_toml // channel: [mandatory] [ path(toml) ]
val_vep_genome // string: [mandatory] GRCh37 or GRCh38
val_vep_cache_version // string: [mandatory] 107
@@ -44,10 +46,11 @@ workflow ANNOTATE_MT_SNVS {
// Vcfanno
ZIP_TABIX_HMTNOTE_MT.out.gz_tbi
- .map { meta, vcf, tbi -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, []]}
+ .combine(ch_vcfanno_extra)
+ .map { meta, vcf, tbi, resources -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, resources]}
.set { ch_in_vcfanno }
- VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources)
+ VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources)
ZIP_TABIX_VCFANNO_MT(VCFANNO_MT.out.vcf)
ch_vcfanno_vcf = ZIP_TABIX_VCFANNO_MT.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf]}
@@ -56,8 +59,7 @@ workflow ANNOTATE_MT_SNVS {
// Annotating with CADD
if (params.cadd_resources != null) {
ANNOTATE_CADD (
- ch_vcfanno_vcf,
- ch_vcfanno_tbi,
+ ZIP_TABIX_VCFANNO_MT.out.gz_tbi,
ch_cadd_header,
ch_cadd_resources
)
diff --git a/subworkflows/local/annotate_repeat_expansions.nf b/subworkflows/local/annotate_repeat_expansions.nf
new file mode 100644
index 00000000..5f8b8ea0
--- /dev/null
+++ b/subworkflows/local/annotate_repeat_expansions.nf
@@ -0,0 +1,34 @@
+//
+// Annotate repeat expansions
+//
+
+include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../modules/nf-core/bcftools/view/main'
+include { STRANGER } from '../../modules/nf-core/stranger/main'
+include { TABIX_TABIX as INDEX_STRANGER } from '../../modules/nf-core/tabix/tabix/main'
+
+workflow ANNOTATE_REPEAT_EXPANSIONS {
+ take:
+ ch_variant_catalog // channel: [mandatory] [ path(variant_catalog.json) ]
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // Annotate, compress and index
+ STRANGER ( ch_vcf, ch_variant_catalog )
+ COMPRESS_STRANGER (
+ STRANGER.out.vcf.map{ meta, vcf -> [meta, vcf, [] ]},
+ [], [], []
+ )
+ INDEX_STRANGER ( COMPRESS_STRANGER.out.vcf )
+
+ ch_vcf_idx = COMPRESS_STRANGER.out.vcf.join(INDEX_STRANGER.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+
+ ch_versions = ch_versions.mix(STRANGER.out.versions.first())
+ ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions.first())
+ ch_versions = ch_versions.mix(INDEX_STRANGER.out.versions.first())
+
+emit:
+ vcf = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/annotation/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf
index e471dd21..12c596a7 100644
--- a/subworkflows/local/annotation/annotate_cadd.nf
+++ b/subworkflows/local/annotation/annotate_cadd.nf
@@ -12,15 +12,14 @@ include { TABIX_TABIX as TABIX_VIEW } from '../../../modules/nf-core/tabix/t
workflow ANNOTATE_CADD {
take:
- ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ]
- ch_index // channel: [mandatory] [ val(meta), path(tbis) ]
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ]
ch_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(dir) ]
main:
ch_versions = Channel.empty()
- BCFTOOLS_VIEW(ch_vcf.join(ch_index), [], [], [])
+ BCFTOOLS_VIEW(ch_vcf, [], [], [])
TABIX_VIEW(BCFTOOLS_VIEW.out.vcf)
@@ -29,7 +28,6 @@ workflow ANNOTATE_CADD {
TABIX_CADD(CADD.out.tsv)
ch_vcf
- .join(ch_index)
.join(CADD.out.tsv)
.join(TABIX_CADD.out.tbi)
.combine(ch_header)
diff --git a/subworkflows/local/call_repeat_expansions.nf b/subworkflows/local/call_repeat_expansions.nf
index 130b7a5b..c45a92ff 100644
--- a/subworkflows/local/call_repeat_expansions.nf
+++ b/subworkflows/local/call_repeat_expansions.nf
@@ -4,14 +4,11 @@
include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_EXP } from '../../modules/nf-core/bcftools/norm/main'
include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_EXP } from '../../modules/nf-core/bcftools/reheader/main'
-include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../modules/nf-core/bcftools/view/main'
include { EXPANSIONHUNTER } from '../../modules/nf-core/expansionhunter/main'
include { PICARD_RENAMESAMPLEINVCF as RENAMESAMPLE_EXP } from '../../modules/nf-core/picard/renamesampleinvcf/main'
-include { STRANGER } from '../../modules/nf-core/stranger/main'
include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SVDB_MERGE as SVDB_MERGE_REPEATS } from '../../modules/nf-core/svdb/merge/main'
-include { TABIX_TABIX as INDEX_STRANGER } from '../../modules/nf-core/tabix/tabix/main'
include { TABIX_TABIX as TABIX_EXP_RENAME } from '../../modules/nf-core/tabix/tabix/main'
workflow CALL_REPEAT_EXPANSIONS {
@@ -63,29 +60,16 @@ workflow CALL_REPEAT_EXPANSIONS {
SVDB_MERGE_REPEATS ( ch_svdb_merge_input, [] )
- // Annotate, compress and index
- STRANGER ( SVDB_MERGE_REPEATS.out.vcf, ch_variant_catalog )
- COMPRESS_STRANGER (
- STRANGER.out.vcf.map{ meta, vcf -> [meta, vcf, [] ]},
- [], [], []
- )
- INDEX_STRANGER ( COMPRESS_STRANGER.out.vcf )
-
- ch_vcf_idx = COMPRESS_STRANGER.out.vcf.join(INDEX_STRANGER.out.tbi, failOnMismatch:true, failOnDuplicate:true)
-
ch_versions = ch_versions.mix(EXPANSIONHUNTER.out.versions.first())
ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_EXP.out.versions.first())
ch_versions = ch_versions.mix(RENAMESAMPLE_EXP.out.versions.first() )
ch_versions = ch_versions.mix(TABIX_EXP_RENAME.out.versions.first())
ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_EXP.out.versions.first())
ch_versions = ch_versions.mix(SVDB_MERGE_REPEATS.out.versions.first())
- ch_versions = ch_versions.mix(STRANGER.out.versions.first())
- ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions.first())
- ch_versions = ch_versions.mix(INDEX_STRANGER.out.versions.first())
ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
emit:
- vcf = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ]
- versions = ch_versions // channel: [ path(versions.yml) ]
+ vcf = SVDB_MERGE_REPEATS.out.vcf // channel: [ val(meta), path(vcf) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
index 48bc500a..5d0b2ce9 100644
--- a/subworkflows/local/call_snv.nf
+++ b/subworkflows/local/call_snv.nf
@@ -28,6 +28,7 @@ workflow CALL_SNV {
ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ]
ch_call_interval // channel: [mandatory] [ path(intervals) ]
ch_ml_model // channel: [mandatory] [ path(model) ]
+ ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
ch_foundin_header // channel: [mandatory] [ path(header) ]
ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ]
@@ -50,6 +51,7 @@ workflow CALL_SNV {
ch_genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
+ ch_par_bed,
ch_case_info,
ch_foundin_header,
ch_genome_chrsizes
diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
index 76f40af5..65f2f9ff 100644
--- a/subworkflows/local/call_structural_variants.nf
+++ b/subworkflows/local/call_structural_variants.nf
@@ -38,10 +38,19 @@ workflow CALL_STRUCTURAL_VARIANTS {
.collect{it[1]}
.set{ manta_vcf }
- CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
- .vcf
- .collect{it[1]}
- .set { tiddit_vcf }
+ if (params.analysis_type.equals("wgs")) {
+ CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
+ .vcf
+ .collect{it[1]}
+ .set { tiddit_vcf }
+ ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions)
+
+ CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info)
+ .vcf
+ .collect{it[1]}
+ .set { cnvnator_vcf }
+ ch_versions = ch_versions.mix(CALL_SV_CNVNATOR.out.versions)
+ }
if (!params.skip_germlinecnvcaller) {
CALL_SV_GERMLINECNVCALLER (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model)
@@ -52,11 +61,6 @@ workflow CALL_STRUCTURAL_VARIANTS {
ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
}
- CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info)
- .vcf
- .collect{it[1]}
- .set { cnvnator_vcf }
-
if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
ch_versions = ch_versions.mix(CALL_SV_MT.out.versions)
@@ -64,16 +68,27 @@ workflow CALL_STRUCTURAL_VARIANTS {
//merge
if (params.skip_germlinecnvcaller) {
+ if (params.analysis_type.equals("wgs")) {
+ tiddit_vcf
+ .combine(manta_vcf)
+ .combine(cnvnator_vcf)
+ .toList()
+ .set { vcf_list }
+ } else {
+ manta_vcf
+ .toList()
+ .set { vcf_list }
+ }
+ } else if (params.analysis_type.equals("wgs")){
tiddit_vcf
.combine(manta_vcf)
+ .combine(gcnvcaller_vcf)
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
- tiddit_vcf
- .combine(manta_vcf)
+ manta_vcf
.combine(gcnvcaller_vcf)
- .combine(cnvnator_vcf)
.toList()
.set { vcf_list }
}
@@ -86,9 +101,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
TABIX_TABIX (SVDB_MERGE.out.vcf)
- ch_versions = ch_versions.mix(CALL_SV_CNVNATOR.out.versions)
ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
- ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index 2bd4b6dd..8c39c614 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -7,14 +7,11 @@ include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modul
include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEME_INDEX as BWAMEME_INDEX_GENOME } from '../../modules/nf-core/bwameme/index/main'
-include { BWAMEME_INDEX as BWAMEME_INDEX_MT_SHIFT } from '../../modules/nf-core/bwameme/index/main'
include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main'
include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main'
-include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WGS } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf'
-include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WES } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf'
include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main'
include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes'
include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main'
@@ -24,6 +21,8 @@ include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modul
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main'
+include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main'
+include { TABIX_TABIX as TABIX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/tabix/main'
include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main'
include { TABIX_TABIX as TABIX_GNOMAD_AF } from '../../modules/nf-core/tabix/tabix/main'
include { TABIX_TABIX as TABIX_PT } from '../../modules/nf-core/tabix/tabix/main'
@@ -31,25 +30,31 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul
workflow PREPARE_REFERENCES {
take:
- ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
- ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
- ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ]
- ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ]
- ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ]
- ch_target_bed // channel: [mandatory for WES] [ path(bed) ]
- ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ]
+ ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ]
+ ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ]
+ ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ]
+ ch_target_bed // channel: [mandatory for WES] [ path(bed) ]
+ ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ]
main:
- ch_versions = Channel.empty()
- ch_tbi = Channel.empty()
- ch_bgzip_tbi = Channel.empty()
- ch_bwa = Channel.empty()
- ch_sentieonbwa = Channel.empty()
+ ch_versions = Channel.empty()
+ ch_tbi = Channel.empty()
+ ch_bgzip_tbi = Channel.empty()
+ ch_bwa = Channel.empty()
+ ch_sentieonbwa = Channel.empty()
+ ch_vcfanno_extra = Channel.empty()
+ ch_vcfanno_bgzip = Channel.empty()
+ ch_vcfanno_index = Channel.empty()
// Genome indices
SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]])
GATK_SD(ch_genome_fasta)
- ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect()
+ ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect()
+ ch_dict = Channel.empty().mix(ch_genome_dictionary, GATK_SD.out.dict).collect()
GET_CHROM_SIZES( ch_fai )
// Genome alignment indices
@@ -67,7 +72,6 @@ workflow PREPARE_REFERENCES {
// MT alignment indices
BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
- BWAMEME_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
ch_bwa_mtshift = Channel.empty().mix(SENTIEON_BWAINDEX_MT_SHIFT.out.index, BWA_INDEX_MT_SHIFT.out.index).collect()
@@ -87,9 +91,30 @@ workflow PREPARE_REFERENCES {
TABIX_GNOMAD_AF(ch_gnomad_af_tab)
TABIX_PT(ch_target_bed).tbi.set { ch_tbi }
TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi }
+ ch_vcfanno_extra_unprocessed
+ .branch { it ->
+ bgzipindex: !it[1].toString().endsWith(".gz")
+ index: it[1].toString().endsWith(".gz")
+ }
+ .set { ch_vcfanno_tabix_in }
+
+ TABIX_VCFANNOEXTRA(ch_vcfanno_tabix_in.index).tbi
+ .join(ch_vcfanno_tabix_in.index)
+ .map { meta, tbi, vcf -> return [[vcf,tbi]]}
+ .set {ch_vcfanno_index}
+ TABIX_BGZIPINDEX_VCFANNOEXTRA(ch_vcfanno_tabix_in.bgzipindex)
+ Channel.empty()
+ .mix(TABIX_BGZIPINDEX_VCFANNOEXTRA.out.gz_tbi, TABIX_BGZIPINDEX_VCFANNOEXTRA.out.gz_csi)
+ .map { meta, vcf, index -> return [[vcf,index]] }
+ .set {ch_vcfanno_bgzip}
+
+ Channel.empty()
+ .mix(ch_vcfanno_bgzip, ch_vcfanno_index)
+ .collect()
+ .set{ch_vcfanno_extra}
// Generate bait and target intervals
- GATK_BILT(ch_target_bed, GATK_SD.out.dict).interval_list
+ GATK_BILT(ch_target_bed, ch_dict).interval_list
GATK_ILT(GATK_BILT.out.interval_list)
GATK_ILT.out.interval_list
.collect{ it[1] }
@@ -101,10 +126,6 @@ workflow PREPARE_REFERENCES {
CAT_CAT_BAIT ( ch_bait_intervals_cat_in )
UNTAR_VEP_CACHE (ch_vep_cache)
- //cnvcalling intervals
- GATK_PREPROCESS_WGS (ch_genome_fasta, ch_fai, GATK_SD.out.dict, [[],[]], [[],[]]).set {ch_preprocwgs}
- GATK_PREPROCESS_WES (ch_genome_fasta, ch_fai, GATK_SD.out.dict, GATK_BILT.out.interval_list, [[],[]]).set {ch_preprocwes}
-
// RTG tools
ch_genome_fasta.map { meta, fasta -> return [meta, fasta, [], [] ] }
.set {ch_rtgformat_in}
@@ -122,20 +143,19 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions)
- ch_versions = ch_versions.mix(BWAMEME_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions)
ch_versions = ch_versions.mix(TABIX_PT.out.versions)
ch_versions = ch_versions.mix(TABIX_PBT.out.versions)
+ ch_versions = ch_versions.mix(TABIX_BGZIPINDEX_VCFANNOEXTRA.out.versions)
+ ch_versions = ch_versions.mix(TABIX_VCFANNOEXTRA.out.versions)
ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions)
ch_versions = ch_versions.mix(GATK_BILT.out.versions)
ch_versions = ch_versions.mix(GATK_ILT.out.versions)
ch_versions = ch_versions.mix(CAT_CAT_BAIT.out.versions)
ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)
- ch_versions = ch_versions.mix(GATK_PREPROCESS_WGS.out.versions)
- ch_versions = ch_versions.mix(GATK_PREPROCESS_WES.out.versions)
ch_versions = ch_versions.mix(RTGTOOLS_FORMAT.out.versions)
emit:
@@ -144,9 +164,7 @@ workflow PREPARE_REFERENCES {
genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
- genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ]
- readcount_intervals = Channel.empty()
- .mix(ch_preprocwgs.interval_list,ch_preprocwes.interval_list)// channel: [ path(intervals) ]
+ genome_dict = ch_dict // channel: [ val(meta), path(dict) ]
sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ]
mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
@@ -156,11 +174,11 @@ workflow PREPARE_REFERENCES {
mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ]
mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
- mtshift_bwameme_index = BWAMEME_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]
gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
+ vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ]
bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ]
target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf
index 26c4fa75..2b08eebb 100644
--- a/subworkflows/local/qc_bam.nf
+++ b/subworkflows/local/qc_bam.nf
@@ -45,10 +45,8 @@ workflow QC_BAM {
PICARD_COLLECTHSMETRICS (ch_hsmetrics_in, ch_genome_fasta, ch_genome_fai, [[],[]])
- if (!params.skip_qualimap) {
- ch_qualimap = QUALIMAP_BAMQC (ch_bam, []).results
- ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first())
- }
+ ch_qualimap = QUALIMAP_BAMQC (ch_bam, []).results
+ ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first())
TIDDIT_COV (ch_bam, [[],[]]) // 2nd pos. arg is req. only for cram input
diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf
index 36c0cbaa..bafaf7ce 100644
--- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf
@@ -90,7 +90,7 @@ workflow PIPELINE_INITIALISATION {
.combine( ch_original_input )
.map { counts, meta, fastq1, fastq2 ->
new_meta = meta + [num_lanes:counts[meta.id],
- read_group:"\'@RG\\tID:"+ fastq1.toString().split('/')[-1] + "\\tPL:" + params.platform.toUpperCase() + "\\tSM:" + meta.id + "\'"]
+ read_group:"\'@RG\\tID:"+ fastq1.simpleName + "_" + meta.lane + "\\tPL:" + params.platform.toUpperCase() + "\\tSM:" + meta.id + "\'"]
if (!fastq2) {
return [ new_meta + [ single_end:true ], [ fastq1 ] ]
} else {
@@ -203,10 +203,11 @@ def genomeExistsError() {
def toolCitationText() {
def align_text = []
- def variant_call_text = []
- def repeat_call_text = []
+ def repeats_text = []
def snv_annotation_text = []
+ def snv_calls_text = []
def sv_annotation_text = []
+ def sv_calls_text = []
def mt_annotation_text = []
def qc_bam_text = []
def me_calls_text = []
@@ -215,54 +216,67 @@ def toolCitationText() {
def other_citation_text = []
align_text = [
- params.aligner.equals("bwa") ? "BWA (Li, 2013)," :"",
- params.aligner.equals("bwamem2") ? "BWA-MEM2 (Vasimuddin et al., 2019)," : "",
+ params.aligner.equals("bwa") ? "BWA (Li, 2013)," :"",
+ params.aligner.equals("bwamem2") ? "BWA-MEM2 (Vasimuddin et al., 2019)," : "",
+ params.aligner.equals("bwameme") ? "BWA-MEME (Jung et al., 2022)," : "",
params.aligner.equals("sentieon") ? "Sentieon DNASeq (Kendig et al., 2019)," : "",
- params.aligner.equals("sentieon") ? "Sentieon Tools (Freed et al., 2017)," : ""
+ params.aligner.equals("sentieon") ? "Sentieon Tools (Freed et al., 2017)," : ""
]
- variant_call_text = [
- params.variant_caller.equals("deepvariant") ? "DeepVariant (Poplin et al., 2018)," : "",
- params.variant_caller.equals("sentieon") ? "Sentieon DNAscope (Freed et al., 2022)," : "",
- params.skip_haplocheck ? "" : "Haplocheck (Weissensteiner et al., 2021),",
- "CNVnator (Abyzov et al., 2011),",
- "TIDDIT (Eisfeldt et al., 2017),",
- "Manta (Chen et al., 2016),",
- "GLnexus (Yun et al., 2021),",
- params.skip_eklipse ? "" : "eKLIPse (Goudenge et al., 2019),",
- ]
- repeat_call_text = [
- "ExpansionHunter (Dolzhenko et al., 2019),",
- "stranger (Nilsson & Magnusson, 2021),"
+ repeats_text = [
+ (!params.skip_repeat_calling && params.analysis_type.equals("wgs")) ? "ExpansionHunter (Dolzhenko et al., 2019)," : "",
+ (!params.skip_repeat_annotation && params.analysis_type.equals("wgs")) ? "stranger (Nilsson & Magnusson, 2021)," : ""
]
if (!params.skip_snv_annotation) {
snv_annotation_text = [
"CADD (Rentzsch et al., 2019, 2021),",
"Vcfanno (Pedersen et al., 2016),",
"VEP (McLaren et al., 2016),",
- "Genmod (Magnusson et al., 2018),",
+ "Genmod (Magnusson et al., 2018),"
+ ]
+ }
+ if (!params.skip_snv_calling) {
+ snv_calls_text = [
+ params.variant_caller.equals("deepvariant") ? "DeepVariant (Poplin et al., 2018)," : "",
+ params.variant_caller.equals("sentieon") ? "Sentieon DNAscope (Freed et al., 2022)," : "",
+ params.run_mt_for_wes ? "Haplocheck (Weissensteiner et al., 2021)," : "",
+ "GLnexus (Yun et al., 2021),"
]
}
if (!params.skip_sv_annotation) {
sv_annotation_text = [
"SVDB (Eisfeldt et al., 2017),",
"VEP (McLaren et al., 2016),",
- "Genmod (Magnusson et al., 2018),",
+ "Genmod (Magnusson et al., 2018),"
+ ]
+ }
+ if (!params.skip_sv_calling) {
+ sv_calls_text = [
+ params.analysis_type.equals("wgs") ? "CNVnator (Abyzov et al., 2011)," : "",
+ params.analysis_type.equals("wgs") ? "TIDDIT (Eisfeldt et al., 2017)," : "",
+ "Manta (Chen et al., 2016),",
+ params.analysis_type.equals("wgs") ? "eKLIPse (Goudenge et al., 2019)," : ""
]
}
- if (!params.skip_mt_annotation) {
+ if (!params.skip_mt_annotation && (params.analysis_type.equals("wgs") || params.run_mt_for_wes)) {
mt_annotation_text = [
"CADD (Rentzsch et al., 2019, 2021),",
"VEP (McLaren et al., 2016),",
"Vcfanno (Pedersen et al., 2016),",
"Hmtnote (Preste et al., 2019),",
"HaploGrep2 (Weissensteiner et al., 2016),",
- "Genmod (Magnusson et al., 2018),",
+ "Genmod (Magnusson et al., 2018),"
]
}
- if (!params.skip_me_annotation) {
+ if (!params.skip_me_annotation && params.analysis_type.equals("wgs")) {
me_annotation_text = [
"VEP (McLaren et al., 2016),",
+ "SVDB (Eisfeldt et al., 2017),"
+ ]
+ }
+ if (!params.skip_me_calling && params.analysis_type.equals("wgs")) {
+ me_calls_text = [
"SVDB (Eisfeldt et al., 2017),",
+ "RetroSeq (Keane et al., 2013),"
]
}
qc_bam_text = [
@@ -270,33 +284,30 @@ def toolCitationText() {
"Qualimap (Okonechnikov et al., 2016),",
"TIDDIT (Eisfeldt et al., 2017),",
"UCSC Bigwig and Bigbed (Kent et al., 2010),",
- "Mosdepth (Pedersen & Quinlan, 2018),",
- ]
- me_calls_text = [
- "SVDB (Eisfeldt et al., 2017),",
- "RetroSeq (Keane et al., 2013),",
+ "Mosdepth (Pedersen & Quinlan, 2018),"
]
preprocessing_text = [
- params.skip_fastqc ? "" : "FastQC (Andrews 2010),",
- params.skip_fastp ? "" : "Fastp (Chen, 2023),",
+ "FastQC (Andrews 2010),",
+ params.skip_fastp ? "" : "Fastp (Chen, 2023),"
]
other_citation_text = [
"BCFtools (Danecek et al., 2021),",
"GATK (McKenna et al., 2010),",
"MultiQC (Ewels et al. 2016),",
- params.skip_peddy ? "" : "Peddy (Pedersen & Quinlan, 2017),",
+ params.skip_peddy ? "" : "Peddy (Pedersen & Quinlan, 2017),",
params.run_rtgvcfeval ? "RTG Tools (Cleary et al., 2015)," : "",
"SAMtools (Li et al., 2009),",
- "SMNCopyNumberCaller (Chen et al., 2020),",
+ (!params.skip_smncopynumbercaller && params.analysis_type.equals("wgs")) ? "SMNCopyNumberCaller (Chen et al., 2020)," : "",
"Tabix (Li, 2011)",
"."
]
def concat_text = align_text +
- variant_call_text +
- repeat_call_text +
+ repeats_text +
snv_annotation_text +
+ snv_calls_text +
sv_annotation_text +
+ sv_calls_text +
mt_annotation_text +
qc_bam_text +
me_calls_text +
@@ -311,10 +322,11 @@ def toolCitationText() {
def toolBibliographyText() {
def align_text = []
- def variant_call_text = []
- def repeat_call_text = []
+ def repeats_text = []
def snv_annotation_text = []
+ def snv_calls_text = []
def sv_annotation_text = []
+ def sv_calls_text = []
def mt_annotation_text = []
def qc_bam_text = []
def me_calls_text = []
@@ -325,22 +337,13 @@ def toolBibliographyText() {
align_text = [
params.aligner.equals("bwa") ? "Li, H. (2013). Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM (arXiv:1303.3997). arXiv. http://arxiv.org/abs/1303.3997" :"",
params.aligner.equals("bwamem2") ? "Vasimuddin, Md., Misra, S., Li, H., & Aluru, S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 314–324. https://doi.org/10.1109/IPDPS.2019.00041" : "",
+ params.aligner.equals("bwameme") ? "Jung Y, Han D. BWA-MEME: BWA-MEM emulated with a machine learning approach. Bioinformatics. 2022;38(9):2404-2413. doi:10.1093/bioinformatics/btac137" : "",
params.aligner.equals("sentieon") ? "Kendig, K. I., Baheti, S., Bockol, M. A., Drucker, T. M., Hart, S. N., Heldenbrand, J. R., Hernaez, M., Hudson, M. E., Kalmbach, M. T., Klee, E. W., Mattson, N. R., Ross, C. A., Taschuk, M., Wieben, E. D., Wiepert, M., Wildman, D. E., & Mainzer, L. S. (2019). Sentieon DNASeq Variant Calling Workflow Demonstrates Strong Computational Performance and Accuracy. Frontiers in Genetics, 10, 736. https://doi.org/10.3389/fgene.2019.00736" : "",
params.aligner.equals("sentieon") ? "Freed, D., Aldana, R., Weber, J. A., & Edwards, J. S. (2017). The Sentieon Genomics Tools—A fast and accurate solution to variant calling from next-generation sequence data (p. 115717). bioRxiv. https://doi.org/10.1101/115717" : ""
]
- variant_call_text = [
- params.variant_caller.equals("deepvariant") ? "Poplin, R., Chang, P.-C., Alexander, D., Schwartz, S., Colthurst, T., Ku, A., Newburger, D., Dijamco, J., Nguyen, N., Afshar, P. T., Gross, S. S., Dorfman, L., McLean, C. Y., & DePristo, M. A. (2018). A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology, 36(10), 983–987. https://doi.org/10.1038/nbt.4235" : "",
- params.variant_caller.equals("sentieon") ? "Freed, D., Pan, R., Chen, H., Li, Z., Hu, J., & Aldana, R. (2022). DNAscope: High accuracy small variant calling using machine learning [Preprint]. Bioinformatics. https://doi.org/10.1101/2022.05.20.492556" : "",
- params.skip_haplocheck ? "" : "Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119",
- "Abyzov, A., Urban, A. E., Snyder, M., & Gerstein, M. (2011). CNVnator: An approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing. Genome Research, 21(6), 974–984. https://doi.org/10.1101/gr.114876.110",
- "Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2",
- "Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710",
- "Yun, T., Li, H., Chang, P.-C., Lin, M. F., Carroll, A., & McLean, C. Y. (2021). Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Bioinformatics, 36(24), 5582–5589. https://doi.org/10.1093/bioinformatics/btaa1081",
- params.skip_eklipse ? "" : "Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8",
- ]
- repeat_call_text = [
- "Dolzhenko, E., Deshpande, V., Schlesinger, F., Krusche, P., Petrovski, R., Chen, S., Emig-Agius, D., Gross, A., Narzisi, G., Bowman, B., Scheffler, K., van Vugt, J. J. F. A., French, C., Sanchis-Juan, A., Ibáñez, K., Tucci, A., Lajoie, B. R., Veldink, J. H., Raymond, F. L., … Eberle, M. A. (2019). ExpansionHunter: A sequence-graph-based tool to analyze variation in short tandem repeat regions. Bioinformatics, 35(22), 4754–4756. https://doi.org/10.1093/bioinformatics/btz431",
- "Nilsson, D., & Magnusson, M. (2021). Moonso/stranger v0.7.1 (v0.7.1) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.4548873"
+ repeats_text = [
+ (!params.skip_repeat_calling && params.analysis_type.equals("wgs") ) ? "Dolzhenko, E., Deshpande, V., Schlesinger, F., Krusche, P., Petrovski, R., Chen, S., Emig-Agius, D., Gross, A., Narzisi, G., Bowman, B., Scheffler, K., van Vugt, J. J. F. A., French, C., Sanchis-Juan, A., Ibáñez, K., Tucci, A., Lajoie, B. R., Veldink, J. H., Raymond, F. L., … Eberle, M. A. (2019). ExpansionHunter: A sequence-graph-based tool to analyze variation in short tandem repeat regions. Bioinformatics, 35(22), 4754–4756. https://doi.org/10.1093/bioinformatics/btz431" : "",
+ (!params.skip_repeat_annotation && params.analysis_type.equals("wgs") ) ? "Nilsson, D., & Magnusson, M. (2021). Moonso/stranger v0.7.1 (v0.7.1) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.4548873" : ""
]
if (!params.skip_snv_annotation) {
snv_annotation_text = [
@@ -348,17 +351,35 @@ def toolBibliographyText() {
"Rentzsch, P., Witten, D., Cooper, G. M., Shendure, J., & Kircher, M. (2019). CADD: Predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research, 47(D1), D886–D894. https://doi.org/10.1093/nar/gky1016",
"Pedersen, B. S., Layer, R. M., & Quinlan, A. R. (2016). Vcfanno: Fast, flexible annotation of genetic variants. Genome Biology, 17(1), 118. https://doi.org/10.1186/s13059-016-0973-5",
"McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4",
- "Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142",
+ "Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142"
]
}
+ if (!params.skip_snv_calling) {
+ snv_calls_text = [
+ params.variant_caller.equals("deepvariant") ? "Poplin, R., Chang, P.-C., Alexander, D., Schwartz, S., Colthurst, T., Ku, A., Newburger, D., Dijamco, J., Nguyen, N., Afshar, P. T., Gross, S. S., Dorfman, L., McLean, C. Y., & DePristo, M. A. (2018). A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology, 36(10), 983–987. https://doi.org/10.1038/nbt.4235" : "",
+ params.variant_caller.equals("sentieon") ? "Freed, D., Pan, R., Chen, H., Li, Z., Hu, J., & Aldana, R. (2022). DNAscope: High accuracy small variant calling using machine learning [Preprint]. Bioinformatics. https://doi.org/10.1101/2022.05.20.492556" : "",
+ params.run_mt_for_wes ? "Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119" : "",
+ "Yun, T., Li, H., Chang, P.-C., Lin, M. F., Carroll, A., & McLean, C. Y. (2021). Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Bioinformatics, 36(24), 5582–5589. https://doi.org/10.1093/bioinformatics/btaa1081"
+ ]
+ }
+
if (!params.skip_sv_annotation) {
sv_annotation_text = [
"Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2",
"McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4",
- "Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142",
+ "Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142"
]
}
- if (!params.skip_mt_annotation) {
+ if (!params.skip_sv_calling) {
+ sv_calls_text = [
+ params.analysis_type.equals("wgs") ? "Abyzov, A., Urban, A. E., Snyder, M., & Gerstein, M. (2011). CNVnator: An approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing. Genome Research, 21(6), 974–984. https://doi.org/10.1101/gr.114876.110" : "",
+ params.analysis_type.equals("wgs") ? "Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2" : "",
+ "Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710",
+ "Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8"
+ ]
+ }
+
+ if (!params.skip_mt_annotation && (params.analysis_type.equals("wgs") || params.run_mt_for_wes)) {
mt_annotation_text = [
"Rentzsch, P., Schubach, M., Shendure, J., & Kircher, M. (2021). CADD-Splice—Improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Medicine, 13(1), 31. https://doi.org/10.1186/s13073-021-00835-9",
"Rentzsch, P., Witten, D., Cooper, G. M., Shendure, J., & Kircher, M. (2019). CADD: Predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research, 47(D1), D886–D894. https://doi.org/10.1093/nar/gky1016",
@@ -366,13 +387,19 @@ def toolBibliographyText() {
"McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4",
"Preste, R., Clima, R., & Attimonelli, M. (2019). Human mitochondrial variant annotation with HmtNote [Preprint]. Bioinformatics. https://doi.org/10.1101/600619",
"Weissensteiner, H., Pacher, D., Kloss-Brandstätter, A., Forer, L., Specht, G., Bandelt, H.-J., Kronenberg, F., Salas, A., & Schönherr, S. (2016). HaploGrep 2: Mitochondrial haplogroup classification in the era of high-throughput sequencing. Nucleic Acids Research, 44(W1), W58–W63. https://doi.org/10.1093/nar/gkw233",
- "Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142",
+ "Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142"
]
}
- if (!params.skip_me_annotation) {
+ if (!params.skip_me_annotation && params.analysis_type.equals("wgs")) {
me_annotation_text = [
"McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4",
+ "Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2"
+ ]
+ }
+ if (!params.skip_me_calling && params.analysis_type.equals("wgs")) {
+ me_calls_text = [
"Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2",
+ "Keane, T. M., Wong, K., & Adams, D. J. (2013). RetroSeq: Transposable element discovery from next-generation sequencing data. Bioinformatics, 29(3), 389–390. https://doi.org/10.1093/bioinformatics/bts697"
]
}
qc_bam_text = [
@@ -380,32 +407,30 @@ def toolBibliographyText() {
"Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: Advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 32(2), 292–294. https://doi.org/10.1093/bioinformatics/btv566",
"Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2",
"Kent, W. J., Zweig, A. S., Barber, G., Hinrichs, A. S., & Karolchik, D. (2010). BigWig and BigBed: Enabling browsing of large distributed datasets. Bioinformatics, 26(17), 2204–2207. https://doi.org/10.1093/bioinformatics/btq351",
- "Pedersen, B. S., & Quinlan, A. R. (2018). Mosdepth: Quick coverage calculation for genomes and exomes. Bioinformatics, 34(5), 867–868. https://doi.org/10.1093/bioinformatics/btx699",
- ]
- me_calls_text = [
- "Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2",
- "Keane, T. M., Wong, K., & Adams, D. J. (2013). RetroSeq: Transposable element discovery from next-generation sequencing data. Bioinformatics, 29(3), 389–390. https://doi.org/10.1093/bioinformatics/bts697",
+ "Pedersen, B. S., & Quinlan, A. R. (2018). Mosdepth: Quick coverage calculation for genomes and exomes. Bioinformatics, 34(5), 867–868. https://doi.org/10.1093/bioinformatics/btx699"
]
preprocessing_text = [
- params.skip_fastqc ? "" : "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",
- params.skip_fastp ? "" : "Chen, S. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta, 2(2), e107. https://doi.org/10.1002/imt2.107",
+ "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",
+ params.skip_fastp ? "" : "Chen, S. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta, 2(2), e107. https://doi.org/10.1002/imt2.107"
]
+
other_citation_text = [
"Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2), giab008. https://doi.org/10.1093/gigascience/giab008",
"McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky, A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010). The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Research, 20(9), 1297–1303. https://doi.org/10.1101/gr.107524.110",
"Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354",
- params.skip_peddy ? "" : "Pedersen, B. S., & Quinlan, A. R. (2017). Who’s Who? Detecting and Resolving Sample Anomalies in Human DNA Sequencing Studies with Peddy. The American Journal of Human Genetics, 100(3), 406–413. https://doi.org/10.1016/j.ajhg.2017.01.017",
+ params.skip_peddy ? "" : "Pedersen, B. S., & Quinlan, A. R. (2017). Who’s Who? Detecting and Resolving Sample Anomalies in Human DNA Sequencing Studies with Peddy. The American Journal of Human Genetics, 100(3), 406–413. https://doi.org/10.1016/j.ajhg.2017.01.017",
params.run_rtgvcfeval ? "Cleary, J. G., Braithwaite, R., Gaastra, K., Hilbush, B. S., Inglis, S., Irvine, S. A., Jackson, A., Littin, R., Rathod, M., Ware, D., Zook, J. M., Trigg, L., & Vega, F. M. D. L. (2015). Comparing Variant Call Files for Performance Benchmarking of Next-Generation Sequencing Variant Calling Pipelines (p. 023754). bioRxiv. https://doi.org/10.1101/023754" : "",
"Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., Durbin, R., & 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics, 25(16), 2078–2079. https://doi.org/10.1093/bioinformatics/btp352",
- "Chen, X., Sanchis-Juan, A., French, C. E., Connell, A. J., Delon, I., Kingsbury, Z., Chawla, A., Halpern, A. L., Taft, R. J., Bentley, D. R., Butchbach, M. E. R., Raymond, F. L., & Eberle, M. A. (2020). Spinal muscular atrophy diagnosis and carrier screening from genome sequencing data. Genetics in Medicine, 22(5), 945–953. https://doi.org/10.1038/s41436-020-0754-0",
- "Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671",
+ (!params.skip_smncopynumbercaller && params.analysis_type.equals("wgs")) ? "Chen, X., Sanchis-Juan, A., French, C. E., Connell, A. J., Delon, I., Kingsbury, Z., Chawla, A., Halpern, A. L., Taft, R. J., Bentley, D. R., Butchbach, M. E. R., Raymond, F. L., & Eberle, M. A. (2020). Spinal muscular atrophy diagnosis and carrier screening from genome sequencing data. Genetics in Medicine, 22(5), 945–953. https://doi.org/10.1038/s41436-020-0754-0" : "",
+ "Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671"
]
def concat_text = align_text +
- variant_call_text +
- repeat_call_text +
+ repeats_text +
snv_annotation_text +
+ snv_calls_text +
sv_annotation_text +
+ sv_calls_text +
mt_annotation_text +
qc_bam_text +
me_calls_text +
diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf
index b0a2fbae..17135d33 100644
--- a/subworkflows/local/variant_calling/call_snv_MT.nf
+++ b/subworkflows/local/variant_calling/call_snv_MT.nf
@@ -23,12 +23,10 @@ workflow CALL_SNV_MT {
GATK4_MUTECT2_MT (ch_bam_bai_int, ch_fasta, ch_fai, ch_dict, [], [], [],[])
- if (!params.skip_haplocheck) {
- HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf).set { ch_haplocheck }
- ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first())
- ch_haplocheck_txt = HAPLOCHECK_MT.out.txt
- ch_haplocheck_html = HAPLOCHECK_MT.out.html
- }
+ HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf).set { ch_haplocheck }
+ ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first())
+ ch_haplocheck_txt = HAPLOCHECK_MT.out.txt
+ ch_haplocheck_html = HAPLOCHECK_MT.out.html
// Filter Mutect2 calls
ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true)
diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf
index c3477d67..3104ee67 100644
--- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf
+++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf
@@ -16,6 +16,7 @@ workflow CALL_SNV_DEEPVARIANT {
ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
ch_foundin_header // channel: [mandatory] [ path(header) ]
ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ]
@@ -28,7 +29,7 @@ workflow CALL_SNV_DEEPVARIANT {
}
.set { ch_deepvar_in }
- DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]] )
+ DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed )
DEEPVARIANT.out.gvcf
.collect{it[1]}
.toList()
diff --git a/subworkflows/local/variant_calling/call_sv_MT.nf b/subworkflows/local/variant_calling/call_sv_MT.nf
index 7ce39b42..74d82d8e 100644
--- a/subworkflows/local/variant_calling/call_sv_MT.nf
+++ b/subworkflows/local/variant_calling/call_sv_MT.nf
@@ -16,13 +16,11 @@ workflow CALL_SV_MT {
ch_eklipse_genes = Channel.empty()
ch_eklipse_circos = Channel.empty()
- if (!params.skip_eklipse){
- EKLIPSE(ch_bam_bai,[])
- ch_eklipse_del = EKLIPSE.out.deletions
- ch_eklipse_genes = EKLIPSE.out.genes
- ch_eklipse_circos = EKLIPSE.out.circos
- ch_versions = ch_versions.mix(EKLIPSE.out.versions.first())
- }
+ EKLIPSE(ch_bam_bai,[])
+ ch_eklipse_del = EKLIPSE.out.deletions
+ ch_eklipse_genes = EKLIPSE.out.genes
+ ch_eklipse_circos = EKLIPSE.out.circos
+ ch_versions = ch_versions.mix(EKLIPSE.out.versions.first())
MT_DELETION(ch_bam_bai, ch_fasta)
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index ab09b6f2..80e51572 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -23,7 +23,6 @@ def mandatoryParams = [
"intervals_wgs",
"intervals_y",
"platform",
- "variant_catalog",
"variant_caller"
]
def missingParamsCount = 0
@@ -32,6 +31,18 @@ if (params.run_rtgvcfeval) {
mandatoryParams += ["rtg_truthvcfs"]
}
+if (!params.skip_repeat_calling) {
+ mandatoryParams += ["variant_catalog"]
+}
+
+if (!params.skip_repeat_annotation) {
+ mandatoryParams += ["variant_catalog"]
+}
+
+if (!params.skip_snv_calling) {
+ mandatoryParams += ["genome"]
+}
+
if (!params.skip_snv_annotation) {
mandatoryParams += ["genome", "vcfanno_resources", "vcfanno_toml", "vep_cache", "vep_cache_version",
"gnomad_af", "score_config_snv", "variant_consequences_snv"]
@@ -58,7 +69,7 @@ if (params.variant_caller.equals("sentieon")) {
}
if (!params.skip_germlinecnvcaller) {
- mandatoryParams += ["ploidy_model", "gcnvcaller_model"]
+ mandatoryParams += ["ploidy_model", "gcnvcaller_model", "readcount_intervals"]
}
if (!params.skip_vep_filter) {
@@ -83,6 +94,9 @@ if (!params.skip_gens) {
mandatoryParams += ["gens_gnomad_pos", "gens_interval_list", "gens_pon_female", "gens_pon_male"]
}
+if (!params.skip_smncopynumbercaller) {
+ mandatoryParams += ["genome"]
+}
for (param in mandatoryParams.unique()) {
if (params[param] == null) {
println("params." + param + " not set.")
@@ -131,6 +145,7 @@ include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkf
include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs'
include { ANNOTATE_MOBILE_ELEMENTS } from '../subworkflows/local/annotate_mobile_elements'
include { ANNOTATE_MT_SNVS } from '../subworkflows/local/annotate_mt_snvs'
+include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions'
include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants'
include { CALL_MOBILE_ELEMENTS } from '../subworkflows/local/call_mobile_elements'
include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
@@ -170,34 +185,46 @@ workflow RAREDISEASE {
ch_samples = ch_samplesheet.map { meta, fastqs -> meta}
ch_case_info = ch_samples.toList().map { CustomFunctions.createCaseChannel(it) }
+ //
// Initialize file channels for PREPARE_REFERENCES subworkflow
- ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
- ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect()
- : Channel.empty()
- ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([[],[]])
- ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([[],[]])
- ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.empty()
- ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect()
- : Channel.value([[],[]])
- ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect()
- : Channel.value([[],[]])
+ //
+ ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
+ ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : Channel.empty()
+ ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect()
+ : Channel.empty()
+ ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([[],[]])
+ ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([[],[]])
+ ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.empty()
+ ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect()
+ : Channel.value([[],[]])
+ ch_vcfanno_extra_unprocessed = params.vcfanno_extra_resources ? Channel.fromPath(params.vcfanno_extra_resources).map { it -> [[id:it.baseName], it] }.collect()
+ : Channel.empty()
+ ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect()
+ : Channel.value([[],[]])
+ //
// Prepare references and indices.
+ //
PREPARE_REFERENCES (
ch_genome_fasta,
ch_genome_fai,
+ ch_genome_dictionary,
ch_mt_fasta,
ch_gnomad_af_tab,
ch_dbsnp,
ch_target_bed_unprocessed,
+ ch_vcfanno_extra_unprocessed,
ch_vep_cache_unprocessed
)
.set { ch_references }
+ //
// Gather built indices or get them from the params
+ //
ch_bait_intervals = ch_references.bait_intervals
ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect()
@@ -220,8 +247,7 @@ workflow RAREDISEASE {
: ch_references.genome_bwameme_index
ch_genome_chrsizes = ch_references.genome_chrom_sizes
ch_genome_fai = ch_references.genome_fai
- ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect()
- : ch_references.genome_dict
+ ch_genome_dictionary = ch_references.genome_dict
ch_gens_gnomad_pos = params.gens_gnomad_pos ? Channel.fromPath(params.gens_gnomad_pos).collect()
: Channel.empty()
ch_gens_interval_list = params.gens_interval_list ? Channel.fromPath(params.gens_interval_list).collect()
@@ -248,15 +274,16 @@ workflow RAREDISEASE {
ch_mtshift_backchain = ch_references.mtshift_backchain
ch_mtshift_bwaindex = ch_references.mtshift_bwa_index
ch_mtshift_bwamem2index = ch_references.mtshift_bwamem2_index
- ch_mtshift_bwamemeindex = ch_references.mtshift_bwameme_index
ch_mtshift_dictionary = ch_references.mtshift_dict
ch_mtshift_fai = ch_references.mtshift_fai
ch_mtshift_fasta = ch_references.mtshift_fasta
ch_mtshift_intervals = ch_references.mtshift_intervals
+ ch_par_bed = params.par_bed ? Channel.fromPath(params.par_bed).map{ it -> [[id:'par_bed'], it] }.collect()
+ : Channel.value([[],[]])
ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect()
: Channel.empty()
ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect()
- : ( ch_references.readcount_intervals ?: Channel.empty() )
+ : Channel.empty()
ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect()
: Channel.value([])
ch_rtg_truthvcfs = params.rtg_truthvcfs ? Channel.fromPath(params.rtg_truthvcfs).collect()
@@ -283,6 +310,7 @@ workflow RAREDISEASE {
: Channel.value([])
ch_variant_consequences_sv = params.variant_consequences_sv ? Channel.fromPath(params.variant_consequences_sv).collect()
: Channel.value([])
+ ch_vcfanno_extra = ch_references.vcfanno_extra
ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect()
: Channel.value([])
ch_vcf2cytosure_blacklist = params.vcf2cytosure_blacklist ? Channel.fromPath(params.vcf2cytosure_blacklist).collect()
@@ -301,19 +329,32 @@ workflow RAREDISEASE {
: Channel.empty()
ch_versions = ch_versions.mix(ch_references.versions)
+ //
// SV caller priority
+ //
if (params.skip_germlinecnvcaller) {
- ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"])
+ if (params.analysis_type.equals("wgs")) {
+ ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"])
+ } else {
+ ch_svcaller_priority = Channel.value([])
+ }
} else {
- ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"])
+ if (params.analysis_type.equals("wgs")) {
+ ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"])
+ } else {
+ ch_svcaller_priority = Channel.value(["manta", "gcnvcaller"])
+ }
}
-
+ //
// Generate pedigree file
+ //
ch_pedfile = CREATE_PEDIGREE_FILE(ch_samples.toList()).ped
ch_versions = ch_versions.mix(CREATE_PEDIGREE_FILE.out.versions)
+ //
// Read and store paths in the vep_plugin_files file
+ //
if (params.vep_plugin_files) {
ch_vep_extra_files_unsplit.splitCsv ( header:true )
.map { row ->
@@ -328,7 +369,9 @@ workflow RAREDISEASE {
.set {ch_vep_extra_files}
}
- // Read and store hgnc ids in a channel
+ //
+ // Dump all HGNC ids in a file
+ //
ch_vep_filters_scout_fmt
.mix (ch_vep_filters_std_fmt)
.set {ch_vep_filters}
@@ -337,13 +380,15 @@ workflow RAREDISEASE {
.txt
.set {ch_hgnc_ids}
+ //
// Input QC
- if (!params.skip_fastqc) {
- FASTQC (ch_samplesheet)
- ch_versions = ch_versions.mix(FASTQC.out.versions.first())
- }
+ //
+ FASTQC (ch_samplesheet)
+ ch_versions = ch_versions.mix(FASTQC.out.versions.first())
- // CREATE CHROMOSOME BED AND INTERVALS
+ //
+ // Create chromosome bed and intervals for splitting and gathering operations
+ //
SCATTER_GENOME (
ch_genome_dictionary,
ch_genome_fai,
@@ -353,9 +398,12 @@ workflow RAREDISEASE {
ch_scatter_split_intervals = ch_scatter.split_intervals ?: Channel.empty()
- //
- // ALIGNING READS, FETCH STATS, AND MERGE.
- //
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ALIGN & FETCH STATS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
ALIGN (
ch_samplesheet,
ch_genome_fasta,
@@ -366,11 +414,12 @@ workflow RAREDISEASE {
ch_genome_dictionary,
ch_mtshift_bwaindex,
ch_mtshift_bwamem2index,
- ch_mtshift_bwamemeindex,
ch_mtshift_fasta,
ch_mtshift_dictionary,
ch_mtshift_fai,
- params.platform
+ params.mbuffer_mem,
+ params.platform,
+ params.samtools_sort_threads
)
.set { ch_mapped }
ch_versions = ch_versions.mix(ALIGN.out.versions)
@@ -402,10 +451,13 @@ workflow RAREDISEASE {
)
ch_versions = ch_versions.mix(QC_BAM.out.versions)
- //
- // EXPANSIONHUNTER AND STRANGER
- //
- if (params.analysis_type.equals("wgs")) {
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ CALL AND ANNOTATE REPEAT EXPANSIONS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+ if (!params.skip_repeat_calling && params.analysis_type.equals("wgs") ) {
CALL_REPEAT_EXPANSIONS (
ch_mapped.genome_bam_bai,
ch_variant_catalog,
@@ -414,223 +466,312 @@ workflow RAREDISEASE {
ch_genome_fai
)
ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
+
+ if (!params.skip_repeat_annotation) {
+ ANNOTATE_REPEAT_EXPANSIONS (
+ ch_variant_catalog,
+ CALL_REPEAT_EXPANSIONS.out.vcf
+ )
+ ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
+ }
}
- //
- // SNV CALLING
- //
- CALL_SNV (
- ch_mapped.genome_bam_bai,
- ch_mapped.mt_bam_bai,
- ch_mapped.mtshift_bam_bai,
- ch_genome_chrsizes,
- ch_genome_fasta,
- ch_genome_fai,
- ch_genome_dictionary,
- ch_mt_intervals,
- ch_mtshift_fasta,
- ch_mtshift_fai,
- ch_mtshift_dictionary,
- ch_mtshift_intervals,
- ch_mtshift_backchain,
- ch_dbsnp,
- ch_dbsnp_tbi,
- ch_call_interval,
- ch_ml_model,
- ch_case_info,
- ch_foundin_header,
- Channel.value(params.sentieon_dnascope_pcr_indel_model)
- )
- ch_versions = ch_versions.mix(CALL_SNV.out.versions)
- //
- // VARIANT EVALUATION
- //
- if (params.run_rtgvcfeval) {
- VARIANT_EVALUATION (
- CALL_SNV.out.genome_vcf_tabix,
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ CALL AND ANNOTATE NUCLEAR AND MITOCHONDRIAL SNVs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+ if (!params.skip_snv_calling) {
+ CALL_SNV (
+ ch_mapped.genome_bam_bai,
+ ch_mapped.mt_bam_bai,
+ ch_mapped.mtshift_bam_bai,
+ ch_genome_chrsizes,
+ ch_genome_fasta,
ch_genome_fai,
- ch_rtg_truthvcfs,
- ch_sdf
+ ch_genome_dictionary,
+ ch_mt_intervals,
+ ch_mtshift_fasta,
+ ch_mtshift_fai,
+ ch_mtshift_dictionary,
+ ch_mtshift_intervals,
+ ch_mtshift_backchain,
+ ch_dbsnp,
+ ch_dbsnp_tbi,
+ ch_call_interval,
+ ch_ml_model,
+ ch_par_bed,
+ ch_case_info,
+ ch_foundin_header,
+ Channel.value(params.sentieon_dnascope_pcr_indel_model)
)
- ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions)
- }
+ ch_versions = ch_versions.mix(CALL_SNV.out.versions)
+
+ //
+ // ANNOTATE GENOME SNVs
+ //
+ if (!params.skip_snv_annotation) {
+
+ ANNOTATE_GENOME_SNVS (
+ CALL_SNV.out.genome_vcf_tabix,
+ params.analysis_type,
+ ch_cadd_header,
+ ch_cadd_resources,
+ ch_vcfanno_extra,
+ ch_vcfanno_resources,
+ ch_vcfanno_lua,
+ ch_vcfanno_toml,
+ params.genome,
+ params.vep_cache_version,
+ ch_vep_cache,
+ ch_genome_fasta,
+ ch_gnomad_af,
+ ch_samples,
+ ch_scatter_split_intervals,
+ ch_vep_extra_files,
+ ch_genome_chrsizes
+ ).set { ch_snv_annotate }
+ ch_versions = ch_versions.mix(ch_snv_annotate.versions)
+
+ GENERATE_CLINICAL_SET_SNV(
+ ch_snv_annotate.vcf_ann,
+ ch_hgnc_ids
+ )
+ ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions)
- //
- // SV CALLING
- //
- CALL_STRUCTURAL_VARIANTS (
- ch_mapped.genome_marked_bam,
- ch_mapped.genome_marked_bai,
- ch_mapped.genome_bam_bai,
- ch_mapped.mt_bam_bai,
- ch_mapped.mtshift_bam_bai,
- ch_genome_bwaindex,
- ch_genome_fasta,
- ch_genome_fai,
- ch_mtshift_fasta,
- ch_case_info,
- ch_target_bed,
- ch_genome_dictionary,
- ch_svcaller_priority,
- ch_readcount_intervals,
- ch_ploidy_model,
- ch_gcnvcaller_model
- )
- ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions)
+ ANN_CSQ_PLI_SNV (
+ GENERATE_CLINICAL_SET_SNV.out.vcf,
+ ch_variant_consequences_snv
+ )
+ ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions)
+
+ ANN_CSQ_PLI_SNV.out.vcf_ann
+ .filter { it ->
+ if (it[0].probands.size()==0) {
+ log.warn("Skipping nuclear SNV ranking since no affected samples are detected in the case")
+ }
+ it[0].probands.size()>0
+ }
+ .set {ch_ranksnv_nuclear_in}
- //
- // ANNOTATE STRUCTURAL VARIANTS
- //
- if (!params.skip_sv_annotation) {
- ANNOTATE_STRUCTURAL_VARIANTS (
- CALL_STRUCTURAL_VARIANTS.out.vcf,
- ch_sv_dbs,
- ch_sv_bedpedbs,
- params.genome,
- params.vep_cache_version,
- ch_vep_cache,
- ch_genome_fasta,
- ch_genome_dictionary,
- ch_vep_extra_files
- ).set { ch_sv_annotate }
- ch_versions = ch_versions.mix(ch_sv_annotate.versions)
+ RANK_VARIANTS_SNV (
+ ch_ranksnv_nuclear_in,
+ ch_pedfile,
+ ch_reduced_penetrance,
+ ch_score_config_snv
+ )
+ ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)
+ }
- GENERATE_CLINICAL_SET_SV(
- ch_sv_annotate.vcf_ann,
- ch_hgnc_ids
- )
- ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions)
+ //
+ // ANNOTATE MT SNVs
+ //
+ if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) {
- ANN_CSQ_PLI_SV (
- GENERATE_CLINICAL_SET_SV.out.vcf,
- ch_variant_consequences_sv
- )
- ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions)
+ ANNOTATE_MT_SNVS (
+ CALL_SNV.out.mt_vcf,
+ CALL_SNV.out.mt_tabix,
+ ch_cadd_header,
+ ch_cadd_resources,
+ ch_genome_fasta,
+ ch_vcfanno_extra,
+ ch_vcfanno_lua,
+ ch_vcfanno_resources,
+ ch_vcfanno_toml,
+ params.genome,
+ params.vep_cache_version,
+ ch_vep_cache,
+ ch_vep_extra_files
+ ).set { ch_mt_annotate }
+ ch_versions = ch_versions.mix(ch_mt_annotate.versions)
- RANK_VARIANTS_SV (
- ANN_CSQ_PLI_SV.out.vcf_ann,
- ch_pedfile,
- ch_reduced_penetrance,
- ch_score_config_sv
- )
- ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions)
+ GENERATE_CLINICAL_SET_MT(
+ ch_mt_annotate.vcf_ann,
+ ch_hgnc_ids
+ )
+ ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions)
+
+ ANN_CSQ_PLI_MT(
+ GENERATE_CLINICAL_SET_MT.out.vcf,
+ ch_variant_consequences_snv
+ )
+ ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions)
+
+ ANN_CSQ_PLI_MT.out.vcf_ann
+ .filter { it ->
+ if (it[0].probands.size()==0) {
+ log.warn("Skipping mitochondrial SNV ranking since no affected samples are detected in the case")
+ }
+ it[0].probands.size()>0
+ }
+ .set {ch_ranksnv_mt_in}
+ RANK_VARIANTS_MT (
+ ch_ranksnv_mt_in,
+ ch_pedfile,
+ ch_reduced_penetrance,
+ ch_score_config_mt
+ )
+ ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)
+ }
}
- //
- // ANNOTATE GENOME SNVs
- //
- if (!params.skip_snv_annotation) {
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ CALL AND ANNOTATE NUCLEAR SVs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
- ANNOTATE_GENOME_SNVS (
- CALL_SNV.out.genome_vcf_tabix,
- params.analysis_type,
- ch_cadd_header,
- ch_cadd_resources,
- ch_vcfanno_resources,
- ch_vcfanno_lua,
- ch_vcfanno_toml,
- params.genome,
- params.vep_cache_version,
- ch_vep_cache,
+ if (!params.skip_sv_calling) {
+ CALL_STRUCTURAL_VARIANTS (
+ ch_mapped.genome_marked_bam,
+ ch_mapped.genome_marked_bai,
+ ch_mapped.genome_bam_bai,
+ ch_mapped.mt_bam_bai,
+ ch_mapped.mtshift_bam_bai,
+ ch_genome_bwaindex,
ch_genome_fasta,
- ch_gnomad_af,
- ch_samples,
- ch_scatter_split_intervals,
- ch_vep_extra_files,
- ch_genome_chrsizes
- ).set { ch_snv_annotate }
- ch_versions = ch_versions.mix(ch_snv_annotate.versions)
-
- GENERATE_CLINICAL_SET_SNV(
- ch_snv_annotate.vcf_ann,
- ch_hgnc_ids
+ ch_genome_fai,
+ ch_mtshift_fasta,
+ ch_case_info,
+ ch_target_bed,
+ ch_genome_dictionary,
+ ch_svcaller_priority,
+ ch_readcount_intervals,
+ ch_ploidy_model,
+ ch_gcnvcaller_model
)
- ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions)
+ ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions)
+
+ //
+ // ANNOTATE STRUCTURAL VARIANTS
+ //
+ if (!params.skip_sv_annotation) {
+ ANNOTATE_STRUCTURAL_VARIANTS (
+ CALL_STRUCTURAL_VARIANTS.out.vcf,
+ ch_sv_dbs,
+ ch_sv_bedpedbs,
+ params.genome,
+ params.vep_cache_version,
+ ch_vep_cache,
+ ch_genome_fasta,
+ ch_genome_dictionary,
+ ch_vep_extra_files
+ ).set { ch_sv_annotate }
+ ch_versions = ch_versions.mix(ch_sv_annotate.versions)
- ANN_CSQ_PLI_SNV (
- GENERATE_CLINICAL_SET_SNV.out.vcf,
- ch_variant_consequences_snv
- )
- ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions)
+ GENERATE_CLINICAL_SET_SV(
+ ch_sv_annotate.vcf_ann,
+ ch_hgnc_ids
+ )
+ ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions)
- RANK_VARIANTS_SNV (
- ANN_CSQ_PLI_SNV.out.vcf_ann,
- ch_pedfile,
- ch_reduced_penetrance,
- ch_score_config_snv
- )
- ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)
+ ANN_CSQ_PLI_SV (
+ GENERATE_CLINICAL_SET_SV.out.vcf,
+ ch_variant_consequences_sv
+ )
+ ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions)
+
+ ANN_CSQ_PLI_SV.out.vcf_ann
+ .filter { it ->
+ if (it[0].probands.size()==0) {
+ log.warn("Skipping SV ranking since no affected samples are detected in the case")
+ }
+ it[0].probands.size()>0
+ }
+ .set {ch_ranksnv_sv_in}
+ RANK_VARIANTS_SV (
+ ch_ranksnv_sv_in,
+ ch_pedfile,
+ ch_reduced_penetrance,
+ ch_score_config_sv
+ )
+ ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions)
+ }
}
- //
- // ANNOTATE MT SNVs
- //
- if (!params.skip_mt_annotation && (params.run_mt_for_wes || params.analysis_type.equals("wgs"))) {
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ CALL AND ANNOTATE MOBILE ELEMENTS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
- ANNOTATE_MT_SNVS (
- CALL_SNV.out.mt_vcf,
- CALL_SNV.out.mt_tabix,
- ch_cadd_header,
- ch_cadd_resources,
+ if (!params.skip_me_calling && params.analysis_type.equals("wgs")) {
+ CALL_MOBILE_ELEMENTS(
+ ch_mapped.genome_bam_bai,
ch_genome_fasta,
- ch_vcfanno_resources,
- ch_vcfanno_toml,
- params.genome,
- params.vep_cache_version,
- ch_vep_cache,
- ch_vep_extra_files
- ).set { ch_mt_annotate }
- ch_versions = ch_versions.mix(ch_mt_annotate.versions)
-
- GENERATE_CLINICAL_SET_MT(
- ch_mt_annotate.vcf_ann,
- ch_hgnc_ids
+ ch_genome_fai,
+ ch_me_references,
+ ch_case_info,
+ params.genome
)
- ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions)
+ ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions)
- ANN_CSQ_PLI_MT(
- GENERATE_CLINICAL_SET_MT.out.vcf,
- ch_variant_consequences_snv
- )
- ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions)
+ if (!params.skip_me_annotation) {
+ ANNOTATE_MOBILE_ELEMENTS(
+ CALL_MOBILE_ELEMENTS.out.vcf,
+ ch_me_svdb_resources,
+ ch_genome_fasta,
+ ch_genome_dictionary,
+ ch_vep_cache,
+ params.genome,
+ params.vep_cache_version,
+ ch_vep_extra_files
+ )
+ ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions)
- RANK_VARIANTS_MT (
- ANN_CSQ_PLI_MT.out.vcf_ann,
- ch_pedfile,
- ch_reduced_penetrance,
- ch_score_config_mt
- )
- ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions)
+ GENERATE_CLINICAL_SET_ME(
+ ANNOTATE_MOBILE_ELEMENTS.out.vcf,
+ ch_hgnc_ids
+ )
+ ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions )
+
+ ANN_CSQ_PLI_ME(
+ GENERATE_CLINICAL_SET_ME.out.vcf,
+ ch_variant_consequences_sv
+ )
+ ch_versions = ch_versions.mix( ANN_CSQ_PLI_ME.out.versions )
+ }
}
- // STEP 1.7: SMNCOPYNUMBERCALLER
- RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output
- .collect{it}
- .toList()
- .set { ch_bam_list }
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SMNCOPYNUMBERCALLER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
- RENAME_BAI_FOR_SMNCALLER(ch_mapped.genome_marked_bai, "bam.bai").output
- .collect{it}
- .toList()
- .set { ch_bai_list }
+ if ( params.analysis_type.equals("wgs") && !params.skip_smncopynumbercaller ) {
+ RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output
+ .collect{it}
+ .toList()
+ .set { ch_bam_list }
- ch_case_info
- .combine(ch_bam_list)
- .combine(ch_bai_list)
- .set { ch_bams_bais }
+ RENAME_BAI_FOR_SMNCALLER(ch_mapped.genome_marked_bai, "bam.bai").output
+ .collect{it}
+ .toList()
+ .set { ch_bai_list }
- SMNCOPYNUMBERCALLER (
- ch_bams_bais
- )
- ch_versions = ch_versions.mix(RENAME_BAM_FOR_SMNCALLER.out.versions)
- ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions)
- ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions)
+ ch_case_info
+ .combine(ch_bam_list)
+ .combine(ch_bai_list)
+ .set { ch_bams_bais }
- // ped correspondence, sex check, ancestry check
+ SMNCOPYNUMBERCALLER (
+ ch_bams_bais
+ )
+ ch_versions = ch_versions.mix(RENAME_BAM_FOR_SMNCALLER.out.versions)
+ ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions)
+ ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions)
+ }
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PEDDY
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
if (!params.skip_peddy) {
PEDDY (
CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true),
@@ -639,8 +780,12 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(PEDDY.out.versions.first())
}
- // Generate CGH files from sequencing data, turned off by default
- if ( !params.skip_vcf2cytosure && params.analysis_type != "wes" ) {
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Generate CGH files from sequencing data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+ if ( !params.skip_vcf2cytosure && params.analysis_type.equals("wgs") && !params.skip_sv_calling && !params.skip_sv_annotation) {
GENERATE_CYTOSURE_FILES (
ch_sv_annotate.vcf_ann,
ch_sv_annotate.tbi,
@@ -651,8 +796,12 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(GENERATE_CYTOSURE_FILES.out.versions)
}
- // GENS
- if ( !params.skip_gens && params.analysis_type != "wes" ) {
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ GENS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+ if ( !params.skip_gens && params.analysis_type.equals("wgs") ) {
GENS (
ch_mapped.genome_bam_bai,
CALL_SNV.out.genome_gvcf,
@@ -668,47 +817,28 @@ workflow RAREDISEASE {
ch_versions = ch_versions.mix(GENS.out.versions)
}
- if (!params.skip_me_calling) {
- CALL_MOBILE_ELEMENTS(
- ch_mapped.genome_bam_bai,
- ch_genome_fasta,
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ VARIANT EVALUATION WITH RTGTOOLS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+ if (params.run_rtgvcfeval) {
+ VARIANT_EVALUATION (
+ CALL_SNV.out.genome_vcf_tabix,
ch_genome_fai,
- ch_me_references,
- ch_case_info,
- params.genome
+ ch_rtg_truthvcfs,
+ ch_sdf
)
- ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions)
-
- if (!params.skip_me_annotation) {
- ANNOTATE_MOBILE_ELEMENTS(
- CALL_MOBILE_ELEMENTS.out.vcf,
- ch_me_svdb_resources,
- ch_genome_fasta,
- ch_genome_dictionary,
- ch_vep_cache,
- params.genome,
- params.vep_cache_version,
- ch_vep_extra_files
- )
- ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions)
-
- GENERATE_CLINICAL_SET_ME(
- ANNOTATE_MOBILE_ELEMENTS.out.vcf,
- ch_hgnc_ids
- )
- ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions )
+ ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions)
+ }
- ANN_CSQ_PLI_ME(
- GENERATE_CLINICAL_SET_ME.out.vcf,
- ch_variant_consequences_sv
- )
- ch_versions = ch_versions.mix( ANN_CSQ_PLI_ME.out.versions )
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ COLLECT SOFTWARE VERSIONS & MultiQC
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
- }
- }
- //
- // Collate and save software versions
- //
softwareVersionsToYAML(ch_versions)
.collectFile(
storeDir: "${params.outdir}/pipeline_info",
@@ -749,9 +879,7 @@ workflow RAREDISEASE {
)
)
- if (!params.skip_fastqc) {
- ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
- }
+ ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.multiple_metrics.map{it[1]}.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.hs_metrics.map{it[1]}.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.qualimap_results.map{it[1]}.collect().ifEmpty([]))