diff --git a/.travis.yml b/.travis.yml index b05868916b..474eb3721c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,19 +12,23 @@ env: - NXF_VER=0.32.0 matrix: - TEST=SOMATIC + - TEST=GERMLINE - TEST=ANNOTATEVEP - TEST=ANNOTATESNPEFF - - TEST=GERMLINE + +before_install: +# PRs to master are only ok if coming from dev branch + - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' +# Donwload containers + - "travis_retry ./scripts/containers.sh --profile docker --test $TEST" install: # Install Nextflow - curl -fsSL get.nextflow.io | bash - chmod +x nextflow - sudo mv nextflow /usr/local/bin/ - # Donwload big containers for ANNOTATEVEP and ANNOTATESNPEF tests) - - "travis_retry ./scripts/containers.sh --profile docker --test $TEST" -# Build references when needed +# Build references if needed before_script: "./scripts/test.sh --profile docker --test $TEST --build" # Actual tests diff --git a/CHANGELOG.md b/CHANGELOG.md index 4592bbcabb..6ca27045ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,41 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [2.2.2] - 2018-12-19 + +### `Added` + +- [#671](https://github.com/SciLifeLab/Sarek/pull/671) - New `publishDirMode` param and docs +- [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675), [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen +- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add container for `CreateIntervalBeds` +- [#692](https://github.com/SciLifeLab/Sarek/pull/692), [#697](https://github.com/SciLifeLab/Sarek/pull/697) - Add AWS iGenomes possibilities (within `conf/igenomes.conf`) +- [#694](https://github.com/SciLifeLab/Sarek/pull/694) - Add monochrome and grey logos for light or dark background +- [#698](https://github.com/SciLifeLab/Sarek/pull/698) - Add btb profile for munin server +- [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Add font-ttf-dejavu-sans-mono `2.37` and fontconfig `2.12.6` to container + +### `Changed` + +- [#678](https://github.com/SciLifeLab/Sarek/pull/678) - Changing VEP to v92 and adjusting CPUs for VEP +- [#663](https://github.com/SciLifeLab/Sarek/pull/663) - Update `do_release.sh` script +- [#671](https://github.com/SciLifeLab/Sarek/pull/671) - publishDir modes are now params +- [#677](https://github.com/SciLifeLab/Sarek/pull/677), [#698](https://github.com/SciLifeLab/Sarek/pull/698), [#703](https://github.com/SciLifeLab/Sarek/pull/703) - Update docs +- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Update old awsbatch configuration +- [#682](https://github.com/SciLifeLab/Sarek/pull/682) - Specifications for memory and cpus for awsbatch +- [#693](https://github.com/SciLifeLab/Sarek/pull/693) - Qualimap bamQC is now ran after mapping and after recalibration for better QC +- [#700](https://github.com/SciLifeLab/Sarek/pull/700) - Update GATK to `4.0.9.0` +- [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Update FastQC to `0.11.8` +- [#705](https://github.com/SciLifeLab/Sarek/pull/705) - Change `--TMP_DIR` by `--tmp-dir` for GATK `4.0.9.0` BaseRecalibrator +- [#706](https://github.com/SciLifeLab/Sarek/pull/706) - Update TravisCI testing + +### `Fixed` + +- [#665](https://github.com/SciLifeLab/Sarek/pull/665) - Input bam file now has always the same name (whether it is from a single fastq pair or multiple) in the MarkDuplicates process, so metrics too +- [#672](https://github.com/SciLifeLab/Sarek/pull/672) - process `PullSingularityContainers` from `buildContainers.nf` now expect a file with the correct `.simg` extension for singularity images, and no longer the `.img` one. +- [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add publishDirMode for `germlineVC.nf` +- [#700](https://github.com/SciLifeLab/Sarek/pull/700) - Fix [#699](https://github.com/SciLifeLab/Sarek/issues/699) missing DP in the FORMAT column VCFs for MuTect2 +- [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Fix [#701](https://github.com/SciLifeLab/Sarek/issues/701) +- [#705](https://github.com/SciLifeLab/Sarek/pull/705) - Fix [#704](https://github.com/SciLifeLab/Sarek/issues/704) + ## [2.2.1] - 2018-10-04 ### `Changed` diff --git a/Dockerfile b/Dockerfile index 8668f88c02..9b6674f9a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,4 +7,4 @@ LABEL \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/sarek-2.2.1/bin:$PATH +ENV PATH /opt/conda/envs/sarek-2.2.2/bin:$PATH diff --git a/Sarek-data b/Sarek-data index c2da0d2a8a..03b5a18b2b 160000 --- a/Sarek-data +++ b/Sarek-data @@ -1 +1 @@ -Subproject commit c2da0d2a8a1c1a8e9b9b0930b84e34073ea43d03 +Subproject commit 03b5a18b2bdba3dac6307e27a5b5c7e5fec3bd54 diff --git a/Singularity b/Singularity index 4f0342b45f..9d6d91ade7 100644 --- a/Singularity +++ b/Singularity @@ -4,10 +4,10 @@ Bootstrap:docker %labels MAINTAINER Maxime Garcia DESCRIPTION Singularity image containing all requirements for the Sarek pipeline - VERSION 2.1.0 + VERSION 2.2.2 %environment - PATH=/opt/conda/envs/sarek-2.2.1/bin:$PATH + PATH=/opt/conda/envs/sarek-2.2.2/bin:$PATH export PATH %files diff --git a/annotate.nf b/annotate.nf index d8a1bd9d2b..d3b4cbccac 100644 --- a/annotate.nf +++ b/annotate.nf @@ -42,6 +42,13 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + + tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : [] annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : [] @@ -103,7 +110,7 @@ vcfForVep = vcfForVep.map { process RunBcftoolsStats { tag {vcf} - publishDir directoryMap.bcftoolsStats, mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForBCFtools @@ -124,7 +131,7 @@ if (params.verbose) bcfReport = bcfReport.view { process RunVcftools { tag {vcf} - publishDir directoryMap.vcftools, mode: 'link' + publishDir directoryMap.vcftools, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForVCFtools @@ -145,10 +152,10 @@ if (params.verbose) vcfReport = vcfReport.view { process RunSnpeff { tag {"${variantCaller} - ${vcf}"} - publishDir params.outDir, mode: 'link', saveAs: { - if (it == "${vcf.simpleName}_snpEff.csv") "${directoryMap.snpeffReports}/${it}" + publishDir params.outDir, mode: params.publishDirMode, saveAs: { + if (it == "${vcf.simpleName}_snpEff.csv") "${directoryMap.snpeffReports.minus(params.outDir+'/')}/${it}" else if (it == "${vcf.simpleName}_snpEff.ann.vcf") null - else "${directoryMap.snpeff}/${it}" + else "${directoryMap.snpeff.minus(params.outDir+'/')}/${it}" } input: @@ -198,8 +205,8 @@ if('merge' in tools) { process RunVEP { tag {"${variantCaller} - ${vcf}"} - publishDir params.outDir, mode: 'link', saveAs: { - if (it == "${vcf.simpleName}_VEP.summary.html") "${directoryMap.vep}/${it}" + publishDir params.outDir, mode: params.publishDirMode, saveAs: { + if (it == "${vcf.simpleName}_VEP.summary.html") "${directoryMap.vep.minus(params.outDir+'/')}/${it}" else null } @@ -215,13 +222,14 @@ process RunVEP { script: finalannotator = annotator == "snpeff" ? 'merge' : 'vep' genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome + cache_version = params.genome == 'GRCh38' || params.genome == 'iGRCh38' ? 92 : 91 """ /opt/vep/src/ensembl-vep/vep --dir /opt/vep/.vep/ \ -i ${vcf} \ -o ${vcf.simpleName}_VEP.ann.vcf \ --assembly ${genome} \ --cache \ - --cache_version 91 \ + --cache_version ${cache_version} \ --database \ --everything \ --filter_common \ @@ -245,7 +253,7 @@ vcfToCompress = snpeffVCF.mix(vepVCF) process CompressVCF { tag {"${annotator} - ${vcf}"} - publishDir "${directoryMap."$finalannotator"}", mode: 'link' + publishDir "${directoryMap."$finalannotator"}", mode: params.publishDirMode input: set annotator, variantCaller, file(vcf) from vcfToCompress @@ -268,14 +276,14 @@ if (params.verbose) vcfCompressedoutput = vcfCompressedoutput.view { } process GetVersionSnpeff { - publishDir directoryMap.version, mode: 'link' + publishDir directoryMap.version, mode: params.publishDirMode output: file("v_*.txt") when: 'snpeff' in tools || 'merge' in tools script: QC.getVersionSnpEFF() } process GetVersionVEP { - publishDir directoryMap.version, mode: 'link' + publishDir directoryMap.version, mode: params.publishDirMode output: file("v_*.txt") when: 'vep' in tools || 'merge' in tools script: QC.getVersionVEP() diff --git a/buildContainers.nf b/buildContainers.nf index c5706dc981..33e4e371b3 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -38,6 +38,12 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + // Define containers to handle (build/push or pull) containersList = defineContainersList() containers = params.containers.split(',').collect {it.trim()} @@ -86,13 +92,13 @@ if (params.verbose) containersBuilt = containersBuilt.view { process PullSingularityContainers { tag {"${params.repository}/${container}:${params.tag}"} - publishDir "${params.containerPath}", mode: 'move' + publishDir "${params.containerPath}", mode: params.publishDirMode input: val container from singularityContainers output: - file("${container}-${params.tag}.img") into imagePulled + file("${container}-${params.tag}.simg") into imagePulled when: params.singularity diff --git a/buildReferences.nf b/buildReferences.nf index 52a34af469..5d3f4d485e 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -40,6 +40,12 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + ch_referencesFiles = Channel.fromPath("${params.refDir}/*") /* @@ -103,7 +109,7 @@ ch_notCompressedfiles process BuildBWAindexes { tag {f_reference} - publishDir params.outDir, mode: 'link' + publishDir params.outDir, mode: params.publishDirMode input: file(f_reference) from ch_fastaForBWA @@ -125,7 +131,7 @@ if (params.verbose) bwaIndexes.flatten().view { process BuildReferenceIndex { tag {f_reference} - publishDir params.outDir, mode: 'link' + publishDir params.outDir, mode: params.publishDirMode input: file(f_reference) from ch_fastaReference @@ -149,7 +155,7 @@ if (params.verbose) ch_referenceIndex.view { process BuildSAMToolsIndex { tag {f_reference} - publishDir params.outDir, mode: 'link' + publishDir params.outDir, mode: params.publishDirMode input: file(f_reference) from ch_fastaForSAMTools @@ -170,7 +176,7 @@ if (params.verbose) ch_samtoolsIndex.view { process BuildVCFIndex { tag {f_reference} - publishDir params.outDir, mode: 'link' + publishDir params.outDir, mode: params.publishDirMode input: file(f_reference) from ch_vcfFile diff --git a/conf/aws-batch.config b/conf/aws-batch.config index 4c4c837610..33850cd686 100644 --- a/conf/aws-batch.config +++ b/conf/aws-batch.config @@ -8,19 +8,55 @@ */ params { - genome_base = params.genome == 'GRCh37' ? "s3://caw-references/grch37" : params.genome == 'GRCh38' ? "s3://caw-references/grch38" : "s3://caw-references/smallgrch37" + genome_base = params.genome == 'GRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'GRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small" + publishDirMode = 'copy' + singleCPUMem = 7.GB // To make the uppmax slurm copy paste work. + localReportDir = 'Reports' } -executor.name = 'awsbatch' -executor.awscli = '/home/ec2-user/miniconda/bin/aws' +executor { + name = 'awsbatch' + awscli = '/home/ec2-user/miniconda/bin/aws' +} + +/* Rolling files are currently not supported on s3 */ +report.file = "${params.localReportDir}/Sarek_report.html" +timeline.file = "${params.localReportDir}/Sarek_timeline.html" +dag.file = "${params.localReportDir}/Sarek_DAG.svg" +trace.file = "${params.localReportDir}/Sarek_trace.txt" process { - executor = 'awsbatch' - queue = 'caw-job-queue' + queue = params.awsqueue errorStrategy = {task.exitStatus == 143 ? 'retry' : 'terminate'} maxErrors = '-1' - maxRetries = 2 + maxRetries = 4 cpus = 2 - memory = 7.GB + memory = 8.GB + + withName:RunBcftoolsStats { + cpus = 1 + memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance + // Use a tiny queue for this one, so storage doesn't run out + queue = params.awsqueue_tiny + } + withName:RunVcftools { + cpus = 1 + memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance + // Use a tiny queue for this one, so storage doesn't run out + queue = params.awsqueue_tiny + } + withName:RunHaplotypecaller { + cpus = 1 + // Increase memory quadratically + memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance + // Use a tiny queue for this one, so storage doesn't run out + queue = params.awsqueue_tiny + } + withName:RunGenotypeGVCFs { + cpus = 1 + memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance + // Use a tiny queue for this one, so storage doesn't run out + queue = params.awsqueue_tiny + } } diff --git a/conf/base.config b/conf/base.config index 02e12faa23..b890e8b076 100644 --- a/conf/base.config +++ b/conf/base.config @@ -6,7 +6,6 @@ * ------------------------------------------------- */ -includeConfig 'genomes.config' wf_repository = 'maxulysse' params { @@ -17,6 +16,7 @@ params { explicitBqsrNeeded = true // Enable recalibration in main.nf genome = 'GRCh38' // Default reference genome is GRCh38 help = false // Don't give help information + markdup_java_options = '"-Xms4000m -Xmx7g"' //Established values for markDuplicate memory consumption, see issue PR #689 for details max_cpus = 16 // Base specifications max_memory = 128.GB // Base specifications max_time = 240.h // Base specifications @@ -27,6 +27,7 @@ params { nucleotidesPerSecond = 1000.0 // To estimate interval size by default onlyQC = false // All process will be run and not only the QC tools outDir = "${PWD}" // Path to output directory + publishDirMode = 'link' // publishDir mode is 'link' by default push = false // Don't push container to DockerHub repository = wf_repository // DockerHub containers repository singularity = false // Don't use singularity to build buildContainers.nf @@ -36,6 +37,9 @@ params { targetBED = false // no targets by default test = false // Not testing by default verbose = false // Enable for more verbose information + awsqueue = false // Queue has to be provided when using awsbatch executor + awsqueue_tiny = params.awsqueue // A separate queue with smaller instance types + localReportDir = false // Used by AWS since reporting is not fully supported on s3 buckets } process { @@ -65,39 +69,6 @@ dag { // Turning on dag by default trace { // Turning on trace tracking by default enabled = true - fields = 'process,task_id,hash,name,attempt,status,exit,realtime,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar' + fields = 'process,task_id,hash,name,attempt,status,exit,realtime,cpus,memory,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar' file = "${params.outDir}/Reports/Sarek_trace.txt" } - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if(type == 'memory'){ - try { - if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if(type == 'time'){ - try { - if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if(type == 'cpus'){ - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/conf/binac.config b/conf/binac.config index c35e58c921..b8d6e74ea3 100644 --- a/conf/binac.config +++ b/conf/binac.config @@ -10,12 +10,13 @@ singularity { } process { - beforeScript = 'module load devel/singularity/2.4.1' + beforeScript = 'module load devel/singularity/2.6.0' executor = 'pbs' queue = 'short' } params { + publishDirMode = 'symlink' max_memory = 128.GB max_cpus = 28 max_time = 48.h diff --git a/conf/cfc.config b/conf/cfc.config new file mode 100644 index 0000000000..3f67dcd75e --- /dev/null +++ b/conf/cfc.config @@ -0,0 +1,23 @@ +/* + * ------------------------------------------------------------- + * Nextflow config file for use with Singularity on CFC at QBIC + * ------------------------------------------------------------- + * Defines basic usage limits and singularity image id. + */ + +/* +*To be improved by process specific configuration asap, once our CFC cluster has the extra options removed - till then, task.attempt in NextFlow is not supported there. +*/ + +process { + beforeScript = 'module load qbic/singularity_slurm/2.5.2' + executor = 'slurm' +} + +params { + publishDirMode = 'symlink' + max_memory = 60.GB + max_cpus = 20 + max_time = 140.h + genome_base = '/sfs/4/qbic/references/human' +} diff --git a/conf/containers.config b/conf/containers.config index 6e0ab0a1ad..2c0c34e2a4 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -26,6 +26,9 @@ process { withName:ConcatVCF { container = "${params.repository}/sarek:${params.tag}" } + withName:CreateIntervalBeds { + container = "${params.repository}/sarek:${params.tag}" + } withName:CreateRecalibrationTable { container = "${params.repository}/sarek:${params.tag}" } @@ -62,7 +65,10 @@ process { withName:RunAscat { container = "${params.repository}/r-base:${params.tag}" } - withName:RunBamQC { + withName:RunBamQCmapped { + container = "${params.repository}/sarek:${params.tag}" + } + withName:RunBamQCrecalibrated { container = "${params.repository}/sarek:${params.tag}" } withName:RunBcftoolsStats { diff --git a/conf/genomes.config b/conf/genomes.config index 53ceecf9cf..1928b780dd 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -4,10 +4,12 @@ * ------------------------------------------------- * Path to reference files * ------------------------------------------------- - * Imported under all Nextflow profiles in + * Imported under Nextflow profiles in * nextflow.config * ------------------------------------------------- - * Modify to add specific versions of genomes + * Defines reference genomes, using paths + * Can be used by any config that customises the base + * path using $params.genome_base / --genome_base * ------------------------------------------------- */ @@ -15,8 +17,6 @@ params { genomes { 'GRCh37' { acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.loci" - cosmic = "${params.genome_base}/GRCh37_Cosmic_v83.vcf" - cosmicIndex = "${cosmic}.idx" dbsnp = "${params.genome_base}/dbsnp_138.b37.vcf" dbsnpIndex = "${dbsnp}.idx" genomeFile = "${params.genome_base}/human_g1k_v37_decoy.fasta" @@ -30,8 +30,6 @@ params { } 'GRCh38' { acLoci = "${params.genome_base}/1000G_phase3_GRCh38_maf0.3.loci" - cosmic = "${params.genome_base}/COSMICv80.vcf" - cosmicIndex = "${cosmic}.idx" dbsnp = "${params.genome_base}/dbsnp_146.hg38.vcf.gz" dbsnpIndex = "${dbsnp}.tbi" genomeFile = "${params.genome_base}/Homo_sapiens_assembly38.fasta" @@ -43,13 +41,11 @@ params { knownIndelsIndex = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" snpeffDb = "GRCh38.86" // This a nasty-looking list of allele-frequencies files. Add/remove files to match to your sets - //AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" - //AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" + //AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" + //AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" } 'smallGRCh37' { acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci" - cosmic = "${params.genome_base}/b37_cosmic_v74.noCHR.sort.4.1.small.vcf" - cosmicIndex = "${cosmic}.idx" dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf" dbsnpIndex = "${dbsnp}.idx" genomeFile = "${params.genome_base}/human_g1k_v37_decoy.small.fasta" diff --git a/conf/igenomes.config b/conf/igenomes.config new file mode 100644 index 0000000000..e3d7077856 --- /dev/null +++ b/conf/igenomes.config @@ -0,0 +1,58 @@ +/* + * ------------------------------------------------- + * Nextflow config file for Sarek + * ------------------------------------------------- + * Path to iGenomes reference files + * ------------------------------------------------- + * Imported under Nextflow profiles in + * nextflow.config + * ------------------------------------------------- + * Defines reference genomes, using iGenome paths + * Can be used by any config that customises the base + * path using $params.genome_base / --genome_base + * ------------------------------------------------- + */ + +params { + genomes { + 'GRCh37' { + acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci" + dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf" + dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx" + genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" + genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" + genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" + bwaIndex = "${params.genome_base}/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}" + intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions_CAW.list" + knownIndels = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf" + knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" + snpeffDb = "GRCh37.75" + } + 'GRCh38' { + acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci" + dbsnp = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnpIndex = "${params.genome_base}/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + genomeFile = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + genomeDict = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + genomeIndex = "${params.genome_base}/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + bwaIndex = "${params.genome_base}/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}" + intervals = "${params.genome_base}/Annotation/intervals/wgs_calling_regions.hg38.bed" + knownIndels = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + knownIndelsIndex = "${params.genome_base}/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + snpeffDb = "GRCh38.86" + } + 'smallGRCh37' { + acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci" + dbsnp = "${params.genome_base}/dbsnp_138.b37.small.vcf" + dbsnpIndex = "${dbsnp}.idx" + genomeFile = "${params.genome_base}/human_g1k_v37_decoy.small.fasta" + bwaIndex = "${genomeFile}.{amb,ann,bwt,pac,sa}" + genomeDict = "${params.genome_base}/human_g1k_v37_decoy.small.dict" + genomeIndex = "${genomeFile}.fai" + intervals = "${params.genome_base}/small.intervals" + knownIndels = "${params.genome_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf" + knownIndelsIndex = "${params.genome_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf.idx" + snpeffDb = "GRCh37.75" + } + } +} diff --git a/conf/munin.config b/conf/munin.config new file mode 100644 index 0000000000..f3a55a7b39 --- /dev/null +++ b/conf/munin.config @@ -0,0 +1,140 @@ +/* + * ------------------------------------------------- + * Nextflow config file for Sarek + * ------------------------------------------------- + * Configuration for running on munin + * ------------------------------------------------- + */ + +env { + NXF_WORK="/scratch" +} + +params { + genome_base = params.genome == 'GRCh37' ? '/btb/references/Homo_sapiens/GATK/GRCh37/' : params.genome == 'GRCh38' ? '/btb/references/Homo_sapiens/GATK/GRCh38/' : 'References/smallGRCh37' + singleCPUMem = 15.GB + totalMemory = 754.GB +} + +executor.$local.cpus = 48 + +process { + // Default process resources + + // A process may use one core, + cpus = 1 + + // 15 GB of memory, + memory = {params.singleCPUMem} + + // and 48 of them are allowed to be launched simultaneously. + maxForks = 48 + + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'terminate'} + maxErrors = '-1' + maxRetries = 3 + + // These processes are defined in buildReferences.nf + + withName:BuildBWAindexes { + memory = {params.totalMemory} // TODO This is likely too high + } + withName:BuildReferenceIndex { + memory = {params.totalMemory} // TODO This is likely too high + } + withName:BuildSAMToolsIndex { + memory = {params.totalMemory} // TODO This is likely too high + } + withName:BuildVCFIndex { + memory = {params.totalMemory} // TODO This is likely too high + } + + // These processes are defined in main.nf + + withName:ConcatVCF { + cpus = 8 + } + withName:CreateRecalibrationTable { + cpus = 16 + memory = {params.totalMemory} + } + withName:MapReads { + cpus = 48 + memory = {params.totalMemory} + } + withName:MarkDuplicates { + // Actually the -Xmx value should be kept lower + cpus = 16 + memory = {2 * params.singleCPUMem} + } + withName:MergeBams { + cpus = 8 + memory = {params.totalMemory} + } + withName:RecalibrateBam { + memory = {params.singleCPUMem * task.attempt} + } + withName:RunAlleleCount { + memory = {params.singleCPUMem * 2 * task.attempt} + } + withName:RunAscat { + memory = {params.singleCPUMem * 2 * task.attempt} + } + withName:RunBamQCmapped { + cpus = 16 + memory = {params.totalMemory} + } + withName:RunBamQCrecalibrated { + cpus = 16 + memory = {params.totalMemory} + } + withName:RunBcftoolsStats { + } + withName:RunConvertAlleleCounts { + memory = {params.singleCPUMem * 2 * task.attempt} + } + withName:RunFastQC { + cpus = 2 // FastQC is only capable of running one thread per fastq file. + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + } + withName:RunFreeBayes { + memory = {params.singleCPUMem * task.attempt} + } + withName:RunHaplotypecaller { + // Increase memory quadratically + memory = {params.singleCPUMem * task.attempt * task.attempt} + } + withName:RunGenotypeGVCFs { + } + withName:RunManta { + cpus = 48 + memory = {params.totalMemory} + } + withName:RunMultiQC { + } + withName:RunMutect2 { + memory = {params.singleCPUMem * task.attempt} + } + withName:RunSamtoolsStats { + } + withName:RunSingleManta { + cpus = 48 + memory = {params.totalMemory} + } + withName:RunSingleStrelka { + cpus = 48 + memory = {params.totalMemory} + } + withName:RunSnpeff { + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + memory = {params.singleCPUMem * task.attempt} + } + withName:RunStrelka { + cpus = 48 + memory = {params.totalMemory} + } + withName:RunVEP { + cpus = 48 + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + } +} diff --git a/conf/resources.config b/conf/resources.config new file mode 100644 index 0000000000..1a7dc84a77 --- /dev/null +++ b/conf/resources.config @@ -0,0 +1,119 @@ +/* + * ------------------------------------------------- + * Nextflow config file for Sarek + * ------------------------------------------------- + * Generalized resource configuration for clusters + * ------------------------------------------------- + */ + +params { + singleCPUMem = 7.GB // for processes that are using more memory but a single CPU only. Use the 'core' queue for these +} + +process { + cpus = { check_max( 10, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'terminate'} + maxErrors = '-1' + maxRetries = 3 + + withName:MapReads { + memory = { check_max( 60.GB * task.attempt, 'memory' ) } + cpus = { check_max( 16, 'cpus' ) } + } + withName:CreateRecalibrationTable { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 60.GB * task.attempt, 'memory') } + } + withName:MarkDuplicates { + // Actually the -Xmx value should be kept lower, + // and is set through the markdup_java_options + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + } + withName:MergeBams { + cpus = { check_max( 4, 'cpus') } + memory = {params.singleCPUMem * task.attempt} + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:RecalibrateBam { + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 7.GB * 2 * task.attempt, 'memory' ) } + time = { check_max( 10.h * task.attempt, 'time' ) } + } + withName:RunAlleleCount { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory' ) } + } + withName:RunAscat { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory' ) } + } + withName:RunBamQCmapped { + cpus = { check_max( 6, 'cpus' ) } + memory = { check_max( 70.GB, 'memory' ) } + } + withName:RunBamQCrecalibrated { + cpus = { check_max( 6, 'cpus' ) } + memory = { check_max( 70.GB, 'memory' ) } + } + withName:RunBcftoolsStats { + cpus = { check_max( 1, 'cpus' ) } + } + withName:RunConvertAlleleCounts { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 14.GB * task.attempt, 'memory' ) } + } + withName:RunFastQC { + cpus = { check_max( 2, 'cpus' ) } // FastQC is only capable of running one thread per fastq file. + errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' } + } + withName:RunFreeBayes { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } + } + withName:RunHaplotypecaller { + cpus = { check_max( 1, 'cpus' ) } + // Increase memory quadratically + memory = { check_max( 7.GB * 2 * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:RunGenotypeGVCFs { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 7.GB * task.attempt, 'memory' ) } + } + withName:RunMultiQC { + errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' } + } + withName:RunMutect2 { + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 7.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:RunSamtoolsStats { + cpus = { check_max( 2, 'cpus' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:RunSingleManta { + cpus = { check_max( 20, 'cpus' ) } + memory = { check_max( 16.GB, 'memory') } + } + withName:RunSingleStrelka { + cpus = { check_max( 20, 'cpus' ) } + memory = { check_max( 16.GB, 'memory') } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:RunSnpeff { + cpus = { check_max( 1, 'cpus' ) } + } + withName:RunStrelka { + cpus = { check_max( 1, 'cpus' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:RunVEP { + cpus = { check_max( 16, 'cpus' ) } + memory = {check_max (32.GB * task.attempt, 'memory' ) } + } +} diff --git a/conf/singularity-path.config b/conf/singularity-path.config index 448e7d1432..c2a357b589 100644 --- a/conf/singularity-path.config +++ b/conf/singularity-path.config @@ -31,6 +31,9 @@ process { withName:ConcatVCF { container = "${params.containerPath}/sarek-${params.tag}.simg" } + withName:CreateIntervalBeds { + container = "${params.containerPath}/sarek-${params.tag}.simg" + } withName:CreateRecalibrationTable { container = "${params.containerPath}/sarek-${params.tag}.simg" } @@ -67,7 +70,10 @@ process { withName:RunAscat { container = "${params.containerPath}/r-base-${params.tag}.simg" } - withName:RunBamQC { + withName:RunBamQCmapped { + container = "${params.containerPath}/sarek-${params.tag}.simg" + } + withName:RunBamQCrecalibrated { container = "${params.containerPath}/sarek-${params.tag}.simg" } withName:RunBcftoolsStats { diff --git a/conf/uppmax-localhost.config b/conf/uppmax-localhost.config index 08052cc188..de38fb1f4b 100644 --- a/conf/uppmax-localhost.config +++ b/conf/uppmax-localhost.config @@ -12,6 +12,7 @@ env { } params { + containerPath = '/sw/data/uppnex/ToolBox/sarek' genome_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : params.genome == 'GRCh38' ? '/sw/data/uppnex/ToolBox/hg38bundle' : 'References/smallGRCh37' singleCPUMem = 8.GB totalMemory = 104.GB // change to 240 on irma @@ -84,7 +85,11 @@ process { withName:RunAscat { memory = {params.singleCPUMem * 2 * task.attempt} } - withName:RunBamQC { + withName:RunBamQCmapped { + cpus = 16 + memory = {params.totalMemory} + } + withName:RunBamQCrecalibrated { cpus = 16 memory = {params.totalMemory} } @@ -128,15 +133,16 @@ process { memory = {params.totalMemory} } withName:RunSnpeff { + cpus = 1 errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - memory = {params.totalMemory} // TODO Does SnpEff really require that much? + memory = {params.singleCPUMem * task.attempt} } withName:RunStrelka { cpus = 16 memory = {params.totalMemory} } withName:RunVEP { + cpus = 16 errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - memory = {params.totalMemory} // TODO Does VEP really require that much? } } diff --git a/conf/uppmax-slurm.config b/conf/uppmax-slurm.config index 2f8f78ad79..a4f5e7d043 100644 --- a/conf/uppmax-slurm.config +++ b/conf/uppmax-slurm.config @@ -7,11 +7,15 @@ */ params { + containerPath = '/sw/data/uppnex/ToolBox/sarek' genome_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : params.genome == 'GRCh38' ? '/sw/data/uppnex/ToolBox/hg38bundle' : 'References/smallGRCh37' runTime = 48.h singleCPUMem = 7.GB // for processes that are using more memory but a single CPU only. Use the 'core' queue for these } +// Extended set of fields, e.g. native_id, cpu and memory: +trace.fields = 'process,task_id,hash,name,native_id,attempt,status,exit,realtime,cpus,memory,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar' + process { clusterOptions = {"-A $params.project"} cpus = 16 @@ -71,7 +75,11 @@ process { memory = {params.singleCPUMem * 2 * task.attempt} queue = 'core' } - withName:RunBamQC { + withName:RunBamQCmapped { + cpus = 16 + } + withName:RunBamQCrecalibrated { + cpus = 16 } withName:RunBcftoolsStats { cpus = 1 @@ -129,7 +137,6 @@ process { time = {params.runTime * task.attempt} } withName:RunVEP { - cpus = 1 errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' } } } diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index 04185c8f13..da3a810a78 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -8,7 +8,7 @@ LABEL \ # Setup ENV variables ENV \ GENOME=GRCh38 \ - VEP_VERSION=91 + VEP_VERSION=92 # Download Genome RUN \ diff --git a/docs/CONFIG.md b/docs/CONFIG.md index 3f973ee90e..df32a2b9c2 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -13,6 +13,22 @@ The standard ones are designed to work on a Swedish UPPMAX cluster, but can be m Every configuration file can be modified for your own use. If you want you can specify the use of a config file using `-c ` +### [`aws-batch.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/aws-batch.config) + +Designed for usage with AWS batch. + +### [`base.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/base.config) + +Define default parameters, is included into every profiles. + +### [`binac.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/binac.config) + +Define usage limits and Singularity for BINAC cluster in Tuebingen. + +### [`cfc.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/cfc.config) + +Designed for usage with Singularity on CFC at QBic. + ### [`containers.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/containers.config) Define Containers for all process. @@ -21,15 +37,27 @@ Use in your own profile if needed. ### [`docker.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/docker.config) -Define Docker Containers for all process. -Images will be pulled automatically. -Use in your own profile if needed. +Specify Docker options. +To be used with [`containers.config`](#containersconfig) ### [`genomes.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/genomes.config) Contain path to all references. Modify it if you want to change genome version, or the path to your references files. +### [`igenomes.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/igenomes.config) + +Contain path to all AWS iGenomes references. +Modify it if you want to change genome version, or the path to your references files. + +### [`munin.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/munin.config) + +Define usage limits and Singularity for munin server at BTB. + +### [`resources.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/resources.config) + +Define Generalized resource configuration for clusters. + ### [`singularity-path.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/singularity-path.config) Define path to Singularity Containers for all process. @@ -39,14 +67,17 @@ You need to set them up before. ### [`singularity.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/singularity.config) -Define Singularity Containers for all process. -Images will be pulled automatically. -Use in your own profile if needed. +Specify Singularity options. +To be used with [`containers.config`](#containersconfig) ### [`travis.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/travis.config) To be used for Travis (2 cpus) or on small computer for testing purpose +### [`uppmax-localhost.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/uppmax-localhost.config) + +Local configuration for a UPPMAX cluster +To be run on a single node ### [`uppmax-slurm.config`](https://github.com/SciLifeLab/Sarek/blob/master/conf/uppmax-slurm.config) Slurm configuration for a UPPMAX cluster @@ -59,17 +90,47 @@ The default profile is `standard`, but Sarek has multiple predefined profiles wh ```bash nextflow run SciLifeLab/Sarek --sample mysample.tsv -profile myprofile ``` +awsbatch { +binac { +btb { +cfc { +docker { +singularity { +singularityPath { +slurm { +slurmDownload { +standard { + +### `awsbatch` +This is the profile for use with AWS Batch. + +### `binac` + +This is the profile for use on the german BinAC cluster. + +### `btb` + +This is the profile for use on the BTB server munin. + +### `cfc` + +This is the profile for use on the CFC cluster in Tuebingen. ### `docker` This is the profile for docker testing on a small machine, or on Travis CI. Docker images will be pulled automatically. -### `standard` +### `singularity` -This is the default profile for use on a localhost on a UPPMAX cluster with Singularity. -Singularity images need to be set up. +This is the profile for Singularity testing on a small machine, or on Travis CI. +Singularity images will be pulled automatically. + +### `singularityPath` + +This is the profile for Singularity testing on a small machine. +Singularity images needs to be set up. ### `slurm` @@ -83,10 +144,10 @@ This is another profile for use on a UPPMAX cluster using the job scheduler slur Will run the workflow on `/scratch`. Singularity images will be pulled automatically. -### `singularity` +### `standard` -This is the profile for Singularity testing on a small machine, or on Travis CI. -Singularity images will be pulled automatically. +This is the default profile for use on a localhost on a UPPMAX cluster with Singularity. +Singularity images need to be set up. ## Customisation The recommended way to use custom settings is to supply Sarek with an additional configuration file. You can use the files in the [`conf/`](https://github.com/SciLifeLab/Sarek/tree/master/conf) directory as an inspiration to make this new `.config` file and specify it using the `-c` flag: diff --git a/docs/CONTAINERS.md b/docs/CONTAINERS.md index f223972127..19dfe39df2 100644 --- a/docs/CONTAINERS.md +++ b/docs/CONTAINERS.md @@ -120,8 +120,8 @@ We provide script to build/push or pull all containers ### vepgrch38 [![vepgrch38-docker status][vepgrch38-docker-badge]][vepgrch38-docker-link] -- Based on `willmclaren/ensembl-vep:release_90.6` -- Contain **[VEP][vep-link]** 90.5 +- Based on `willmclaren/ensembl-vep:release_92` +- Contain **[VEP][vep-link]** 92 - Contain GRCh38 [allelecount-link]: https://github.com/cancerit/alleleCount diff --git a/docs/INSTALL_BIANCA.md b/docs/INSTALL_BIANCA.md index 2137a24032..d7ddb71f87 100644 --- a/docs/INSTALL_BIANCA.md +++ b/docs/INSTALL_BIANCA.md @@ -58,10 +58,11 @@ For more information about using Singularity with UPPMAX, follow the [Singularit ## Install Sarek Sarek use Singularity containers to package all the different tools. +All containers are already stored on UPPMAX. -As `bianca` is secure, no direct download is available, so Sarek and the Singularity containers will have to be installed and updated manually. +As `bianca` is secure, no direct download is available, so Sarek will have to be installed and updated manually. -You can either download Sarek and the containers on your computer (you will need Nextflow and Singularity for that) or on `rackham`, make an archive, and send it to `bianca` using `FileZilla` or `sftp` given your preferences. +You can either download Sarek on your computer or on `rackham`, make an archive, and send it to `bianca` using `FileZilla` or `sftp` given your preferences. All Reference files are already stored in `bianca`. @@ -108,18 +109,6 @@ Wrote Sarek-[snapID].tar.gz > put Sarek-[snapID].tar.gz > exit -# To get the containers -# This script will need Singularity and Nextflow installed -# If executed on Rackham: The script needs to be started from an interactive session -# with at least two cores and approximately 3 hours. The scripts will write about -# 12 Gb data to ~/.singularity, so this amount of disk space needs to be available -# in the users home directory on Rackham. -# -> ./scripts/do_all.sh --pull --tag - -# Send the containers to bianca using the same method -# They will be in the containers/ directory as .img files - # The archive will be in the wharf folder in your user home on your bianca project # Connect to bianca diff --git a/docs/INSTALL_RACKHAM.md b/docs/INSTALL_RACKHAM.md index ab79efb4b3..8d6101b164 100644 --- a/docs/INSTALL_RACKHAM.md +++ b/docs/INSTALL_RACKHAM.md @@ -13,10 +13,7 @@ The Reference files are already stored in `rackham`. Nextflow will automatically fetch Sarek from GitHub when launched if `SciLifeLab/Sarek` is specified as the workflow name. Sarek use Singularity containers to package all the different tools. - -On `rackham` you do have the possibility to use the automatic pull of the containers. - -You can choose a specific location to store these, otherwise they will be stored in the directory where you're running Sarek. +All containers are already stored on UPPMAX. ## Test Sarek with small dataset and small reference diff --git a/docs/PARAMETERS.md b/docs/PARAMETERS.md index 399fade15c..4255ad5aff 100644 --- a/docs/PARAMETERS.md +++ b/docs/PARAMETERS.md @@ -21,6 +21,21 @@ Choose an output directory Specify a project number ID on a UPPMAX cluster. (optional if not on such a cluster) +## --publishDirMode + +Specify wich mode `publishDir` directive need to follow, in Sarek link is the default mode. + +From [Nextflow documentation](https://www.nextflow.io/docs/latest/process.html#publishdir): + +| Mode | Description | +|--------------|-------------| +| copy | Copies the output files into the published directory | +| copyNoFollow | Copies the output files into the published directory without following symlinks ie. copies the links themselves | +| link | Creates a hard link in the published directory for each process output file (default) | +| move | Moves the output files into the published directory. Note: this is only supposed to be used for a terminating process i.e. a process whose output is not consumed by any other downstream process | +| rellink | Creates a relative symbolic link in the published directory for each process output file | +| symlink | Creates an absolute symbolic link in the published directory for each process output file | + ### --sample `file.tsv` Use the given TSV file as sample (cf [TSV documentation](TSV.md)). @@ -43,6 +58,18 @@ Possible values are: `--tools` option is case insensitive to avoid easy introduction of errors when choosing tools. So you can write `--tools mutect2,ascat` or `--tools MuTect2,ASCAT` without worrying about case sensitivity. +### --awsqueue `BatchQueueName` + +Only required if you use the awsbatch profile. This parameter specifies the queue for which jobs are submitted in AWS Batch. + +### --awsqueue_tiny `BatchQueueName` + +Only used if you use the awsbatch profile. This parameter specifies a queue used for certain small jobs that might still require a significant amount of disk storage. + +### --localReportDir `Directory` + +Only used if you use the awsbatch profile. This parameter specifies an output directory for nextflow reports, such as Sarek_timeline.html, which currently is not fully supported to store on s3. + ### --verbose Display more information about files being processed. diff --git a/docs/REFERENCES.md b/docs/REFERENCES.md index 8e0ad12ce9..d3625c56fe 100644 --- a/docs/REFERENCES.md +++ b/docs/REFERENCES.md @@ -1,10 +1,13 @@ # Genomes and reference files -Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs. The [`buildReferences.nf`](#buildreferencesnf) script can be use to build the indexes based on the reference files. +Sarek currently uses GRCh38 by default. +The settings are in `genomes.config`, they can be tailored to your needs. +The [`buildReferences.nf`](#buildreferencesnf) script is used to build the indexes for the reference test. ## GRCh37 -Use `--genome GRCh37` to map against GRCh37. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs. +Use `--genome GRCh37` to map against GRCh37. +Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs. ### GATK bundle @@ -17,20 +20,64 @@ The following files need to be downloaded: - dd05833f18c22cc501e3e31406d140b0 - 'human\_g1k\_v37\_decoy.fasta.gz' - a0764a80311aee369375c5c7dda7e266 - 'Mills\_and\_1000G\_gold\_standard.indels.b37.vcf.gz' -### Other files +### Other files for GRCh37 -From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list). More information about this file in the [intervals documentation](INTERVALS.md) +From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list). +More information about this file in the [intervals documentation](INTERVALS.md) Description of how to generate the Loci file used in the ASCAT process is described [here](https://github.com/SciLifeLab/Sarek/blob/master/docs/ASCAT.md). -You can create your own cosmic reference for any human reference as specified below. +You can create your own cosmic reference for any human reference as specified below in the Cosmic section. -### COSMIC files +## GRCh38 + +Use `--genome GRCh38` to map against GRCh38. +Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs. + +To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). +You can also download the required files from the Google Cloud mirror link [here](https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0). + +The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953dcca8c8997453bbb80b6b5e. + +If you download the data from the FTP servers `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed. +Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`. +The old ones also use the wrong chromosome naming convention. +The Google Cloud mirror has all data in the `v0` directory, but requires you to remove the `resources_broad_hg38_v0_` prefixes from all files. + +The following files need to be downloaded: + +- 3884c62eb0e53fa92459ed9bff133ae6 - 'Homo_sapiens_assembly38.dict' +- 7ff134953dcca8c8997453bbb80b6b5e - 'Homo_sapiens_assembly38.fasta' +- b07e65aa4425bc365141756f5c98328c - 'Homo_sapiens_assembly38.fasta.64.alt' +- e4dc4fdb7358198e0847106599520aa9 - 'Homo_sapiens_assembly38.fasta.64.amb' +- af611ed0bb9487fb1ba4aa1a7e7ad21c - 'Homo_sapiens_assembly38.fasta.64.ann' +- d41d8cd98f00b204e9800998ecf8427e - 'Homo_sapiens_assembly38.fasta.64.bwt' +- 178862a79b043a2f974ef10e3877ef86 - 'Homo_sapiens_assembly38.fasta.64.pac' +- 91a5d5ed3986db8a74782e5f4519eb5f - 'Homo_sapiens_assembly38.fasta.64.sa' +- f76371b113734a56cde236bc0372de0a - 'Homo_sapiens_assembly38.fasta.fai' +- 14cc588a271951ac1806f9be895fb51f - 'Homo_sapiens_assembly38.known_indels.vcf.gz' +- 1a55fdfa6533ae5cbc70e8188e779229 - 'Homo_sapiens_assembly38.known_indels.vcf.gz.tbi' +- 2e02696032dcfe95ff0324f4a13508e3 - 'Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' +- 4c807e2cbe0752c0c44ac82ff3b52025 - 'Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi' + +If you just downloaded the `Homo_sapiens_assembly38.fasta.gz` file, you would need to do: + +``` +gunzip Homo_sapiens_assembly38.fasta.gz +bwa index -6 Homo_sapiens_assembly38.fasta +``` + +Description of how to generate the Loci file used in the ASCAT process is described [here](https://github.com/SciLifeLab/Sarek/blob/master/docs/ASCAT.md). + +You can create your own cosmic reference for any human reference as specified below in the Cosmic section. + +## COSMIC files To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file. Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh) -script. The script requires a fasta index `.fai`, of the reference file you are using. +script for either GRCh37 or GRCh38. +The script requires a fasta index `.fai`, of the reference file you are using. Example: @@ -47,26 +94,14 @@ To index the resulting VCF file use [igvtools](https://software.broadinstitute.o igvtools index ``` -## GRCh38 - -Use `--genome GRCh38` to map against GRCh38. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs. - -To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). - -The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953dcca8c8997453bbb80b6b5e. - -From the `beta/` directory, which seems to be an older version of the bundle, only `Homo_sapiens_assembly38.known_indels.vcf` is needed. Also, you can omit `dbsnp_138_` and `dbsnp_144` files as we use `dbsnp_146`. The old ones also use the wrong chromosome naming convention. - -Afterwards, the following needs to be done: - -``` -gunzip Homo_sapiens_assembly38.fasta.gz -bwa index -6 Homo_sapiens_assembly38.fasta -``` - ## smallGRCh37 -Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37. `smallGRCh37` is the default genome for the testing profile (`-profile testing`). +Use `--genome smallGRCh37` to map against a small reference genome based on GRCh37. +`smallGRCh37` is the default genome for the testing profile (`-profile testing`). + +## AWS iGenomes +Sarek is using [AWS iGenomes](https://ewels.github.io/AWS-iGenomes/), which facilitate storing and sharing references. +Both `GRCh37` and `GRCh38` are available with `--genome GRCh37` or `--genome GRCh38` respectively with any profile using the `conf/igenomes.config` file (eg.: `awsbatch`, or `btb`), or you can specify it with `-c conf/igenomes.config`, it contains all data previously detailed. ## buildReferences.nf diff --git a/docs/images/logos/Sarek Exome/Sarek_exome.svg b/docs/images/logos/Sarek Exome/Sarek_exome.svg new file mode 100644 index 0000000000..63ab7420f7 --- /dev/null +++ b/docs/images/logos/Sarek Exome/Sarek_exome.svg @@ -0,0 +1,227 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek Germline/Sarek_germline.svg b/docs/images/logos/Sarek Germline/Sarek_germline.svg new file mode 100644 index 0000000000..13a3447c10 --- /dev/null +++ b/docs/images/logos/Sarek Germline/Sarek_germline.svg @@ -0,0 +1,242 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek Somatic/Sarek_somatic.svg b/docs/images/logos/Sarek Somatic/Sarek_somatic.svg new file mode 100644 index 0000000000..75b75fc784 --- /dev/null +++ b/docs/images/logos/Sarek Somatic/Sarek_somatic.svg @@ -0,0 +1,232 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek/Sarek_color.svg b/docs/images/logos/Sarek/Sarek_color.svg new file mode 100644 index 0000000000..207b7297b8 --- /dev/null +++ b/docs/images/logos/Sarek/Sarek_color.svg @@ -0,0 +1,201 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek/Sarek_dark_color.svg b/docs/images/logos/Sarek/Sarek_dark_color.svg new file mode 100644 index 0000000000..914c3e7e51 --- /dev/null +++ b/docs/images/logos/Sarek/Sarek_dark_color.svg @@ -0,0 +1,393 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek/Sarek_dark_grey.svg b/docs/images/logos/Sarek/Sarek_dark_grey.svg new file mode 100644 index 0000000000..c000ed4b87 --- /dev/null +++ b/docs/images/logos/Sarek/Sarek_dark_grey.svg @@ -0,0 +1,401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek/Sarek_dark_mono.svg b/docs/images/logos/Sarek/Sarek_dark_mono.svg new file mode 100644 index 0000000000..aab15bcd48 --- /dev/null +++ b/docs/images/logos/Sarek/Sarek_dark_mono.svg @@ -0,0 +1,193 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logos/Sarek/Sarek_grey.svg b/docs/images/logos/Sarek/Sarek_grey.svg new file mode 100644 index 0000000000..e78e5db9be --- /dev/null +++ b/docs/images/logos/Sarek/Sarek_grey.svg @@ -0,0 +1,409 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/Sarek_logo_dark_background.svg b/docs/images/logos/Sarek/Sarek_mono.svg similarity index 65% rename from docs/images/Sarek_logo_dark_background.svg rename to docs/images/logos/Sarek/Sarek_mono.svg index 212539c56c..89d79dab16 100644 --- a/docs/images/Sarek_logo_dark_background.svg +++ b/docs/images/logos/Sarek/Sarek_mono.svg @@ -15,7 +15,7 @@ id="svg4924" version="1.1" inkscape:version="0.91 r13725" - sodipodi:docname="Sarek_logo_dark_background.svg" + sodipodi:docname="Sarek_mono.svg" inkscape:export-filename="Sarek_germline_logo.png" inkscape:export-xdpi="90" inkscape:export-ydpi="90"> @@ -101,11 +101,11 @@ borderopacity="1.0" inkscape:pageopacity="0.0" inkscape:pageshadow="2" - inkscape:zoom="1.979899" - inkscape:cx="147.3736" - inkscape:cy="82.822455" + inkscape:zoom="2.8" + inkscape:cx="154.50915" + inkscape:cy="75.83742" inkscape:document-units="px" - inkscape:current-layer="layer1" + inkscape:current-layer="layer4" showgrid="false" fit-margin-top="0" fit-margin-left="0" @@ -114,9 +114,9 @@ units="px" width="200mm" inkscape:window-width="1920" - inkscape:window-height="1015" - inkscape:window-x="2120" - inkscape:window-y="264" + inkscape:window-height="1007" + inkscape:window-x="0" + inkscape:window-y="0" inkscape:window-maximized="1" /> @@ -126,7 +126,7 @@ image/svg+xml - + @@ -328,47 +328,63 @@ inkscape:label="Sarek" style="display:inline" transform="translate(2.7783959e-5,59.99996)"> - Sarek - - - - + + + + + + + + + + + + + + + + diff --git a/environment.yml b/environment.yml index c2546d6976..0ffad9987b 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: sarek-2.2.1 +name: sarek-2.2.2 channels: - conda-forge - bioconda @@ -9,9 +9,11 @@ channels: dependencies: - bcftools=1.8 - bwa=0.7.17 - - fastqc=0.11.7 + - fastqc=0.11.8 + - font-ttf-dejavu-sans-mono=2.37 #for FastQC + - fontconfig=2.12.6 #for FastQC - freebayes=1.2.0 - - gatk4=4.0.6.0 + - gatk4=4.0.9.0 - htslib=1.9 - igvtools=2.3.93 - manta=1.4.0 diff --git a/germlineVC.nf b/germlineVC.nf index 1cc9f39ddc..7c32373d6e 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -26,8 +26,6 @@ kate: syntax groovy; space-indent on; indent-width 2; https://github.com/SciLifeLab/Sarek/README.md -------------------------------------------------------------------------------- Processes overview - - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files - - RunBamQC - Run qualimap BamQC on recalibrated BAM files - CreateIntervalBeds - Create and sort intervals into bed files - RunHaplotypecaller - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes) - RunGenotypeGVCFs - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes) @@ -45,6 +43,12 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] directoryMap = SarekUtils.defineDirectoryMap(params.outDir) @@ -89,7 +93,7 @@ if (params.verbose) bamFiles = bamFiles.view { } // assume input is recalibrated, ignore explicitBqsrNeeded -(bamForBamQC, bamForSamToolsStats, recalibratedBam, recalTables) = bamFiles.into(4) +(recalibratedBam, recalTables) = bamFiles.into(2) recalTables = recalTables.map{ it + [null] } // null recalibration table means: do not use --BQSR @@ -101,48 +105,6 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { Files : [${it[3].fileName}, ${it[4].fileName}]" } -process RunSamtoolsStats { - tag {idPatient + "-" + idSample} - - publishDir directoryMap.samtoolsStats, mode: 'link' - - input: - set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats - - output: - file ("${bam}.samtools.stats.out") into samtoolsStatsReport - - when: !params.noReports - - script: QC.samtoolsStats(bam) -} - -if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { - "SAMTools stats report:\n\ - File : [${it.fileName}]" -} - -process RunBamQC { - tag {idPatient + "-" + idSample} - - publishDir directoryMap.bamQC, mode: 'link' - - input: - set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC - - output: - file(idSample) into bamQCreport - - when: !params.noReports && !params.noBAMQC - - script: QC.bamQC(bam,idSample,task.memory) -} - -if (params.verbose) bamQCreport = bamQCreport.view { - "BamQC report:\n\ - Dir : [${it.fileName}]" -} - // Here we have a recalibrated bam set, but we need to separate the bam files based on patient status. // The sample tsv config file which is formatted like: "subject status sample lane fastq1 fastq2" // cf fastqFiles channel, I decided just to add _status to the sample name to have less changes to do. @@ -356,7 +318,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "-" + idSampleNormal} - publishDir "${directoryMap."$variantCaller"}", mode: 'link' + publishDir "${directoryMap."$variantCaller"}", mode: params.publishDirMode input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -394,7 +356,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunSingleStrelka { tag {idSample} - publishDir directoryMap.strelka, mode: 'link' + publishDir directoryMap.strelka, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka @@ -447,7 +409,7 @@ if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view { process RunSingleManta { tag {idSample + " - Single Diploid"} - publishDir directoryMap.manta, mode: 'link' + publishDir directoryMap.manta, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -511,7 +473,7 @@ vcfForQC = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir directoryMap.bcftoolsStats, mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForBCFtools @@ -534,7 +496,7 @@ bcfReport.close() process RunVcftools { tag {vcf} - publishDir directoryMap.vcftools, mode: 'link' + publishDir directoryMap.vcftools, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForVCFtools diff --git a/lib/QC.groovy b/lib/QC.groovy index 2e1c20f820..409a5a27d4 100644 --- a/lib/QC.groovy +++ b/lib/QC.groovy @@ -1,15 +1,4 @@ class QC { -// Run bamQC on vcf file - static def bamQC(bam, idSample, mem) { - """ - qualimap --java-mem-size=${mem.toGiga()}G \ - bamqc \ - -bam ${bam} \ - -outdir ${idSample} \ - -outformat HTML - """ - } - // Run bcftools on vcf file static def bcftools(vcf) { """ diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy index 7b3f33df35..0b3931c014 100644 --- a/lib/SarekUtils.groovy +++ b/lib/SarekUtils.groovy @@ -35,6 +35,8 @@ class SarekUtils { 'annotate-VCF', 'annotateTools', 'annotateVCF', + 'awsqueue', + 'awsqueue_tiny', 'build', 'call-name', 'callName', @@ -51,6 +53,9 @@ class SarekUtils { 'genome', 'genomes', 'help', + 'localReportDir', + 'local-report-dir', + 'markdup_java_options', 'max_cpus', 'max_memory', 'max_time', @@ -71,6 +76,8 @@ class SarekUtils { 'outDir', 'params', 'project', + 'publish-dir-mode', + 'publishDirMode', 'push', 'ref-dir', 'refDir', diff --git a/main.nf b/main.nf index f130b42e9d..866221bdfa 100644 --- a/main.nf +++ b/main.nf @@ -33,12 +33,20 @@ kate: syntax groovy; space-indent on; indent-width 2; - CreateRecalibrationTable - Create Recalibration Table with BaseRecalibrator - RecalibrateBam - Recalibrate Bam with PrintReads - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files - - RunBamQC - Run qualimap BamQC on recalibrated BAM files + - RunBamQCmapped - Run qualimap BamQC on mapped BAM files + - RunBamQCrecalibrated - Run qualimap BamQC on recalibrated BAM files ================================================================================ = C O N F I G U R A T I O N = ================================================================================ */ +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + + if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -73,7 +81,6 @@ if (!params.sample && !params.sampleDir) { } // Set up the fastqFiles and bamFiles channels. One of them remains empty -// Except for step annotate, in which both stay empty fastqFiles = Channel.empty() bamFiles = Channel.empty() if (tsvPath) { @@ -123,7 +130,7 @@ if (params.verbose) bamFiles = bamFiles.view { process RunFastQC { tag {idPatient + "-" + idRun} - publishDir "${directoryMap.fastQC}/${idRun}", mode: 'link' + publishDir "${directoryMap.fastQC}/${idRun}", mode: params.publishDirMode input: set idPatient, status, idSample, idRun, file(fastqFile1), file(fastqFile2) from fastqFilesforFastQC @@ -152,7 +159,7 @@ process MapReads { set file(genomeFile), file(bwaIndex) from Channel.value([referenceMap.genomeFile, referenceMap.bwaIndex]) output: - set idPatient, status, idSample, idRun, file("${idRun}.bam") into mappedBam + set idPatient, status, idSample, idRun, file("${idRun}.bam") into (mappedBam, mappedBamForQC) when: step == 'mapping' && !params.onlyQC @@ -174,6 +181,40 @@ if (params.verbose) mappedBam = mappedBam.view { File : [${it[4].fileName}]" } +process RunBamQCmapped { + tag {idPatient + "-" + idSample} + + publishDir directoryMap.bamQC, mode: params.publishDirMode + + input: + set idPatient, status, idSample, idRun, file(bam) from mappedBamForQC + + output: + file(idSample) into bamQCmappedReport + + when: !params.noReports && !params.noBAMQC + + script: + """ + qualimap --java-mem-size=${task.memory.toGiga()}G \ + bamqc \ + -bam ${bam} \ + --paint-chromosome-limits \ + --genome-gc-distr HUMAN \ + -nt ${task.cpus} \ + -skip-duplicated \ + --skip-dup-mode 0 \ + -outdir ${idSample} \ + -outformat HTML + """ +} + +if (params.verbose) bamQCmappedReport = bamQCmappedReport.view { + "BamQC report:\n\ + Dir : [${it.fileName}]" +} + + // Sort bam whether they are standalone or should be merged // Borrowed code from https://github.com/guigolab/chip-nf @@ -226,29 +267,29 @@ if (params.verbose) mergedBam = mergedBam.view { process MarkDuplicates { tag {idPatient + "-" + idSample} - publishDir params.outDir, mode: 'link', + publishDir params.outDir, mode: params.publishDirMode, saveAs: { - if (it == "${bam}.metrics") "${directoryMap.markDuplicatesQC}/${it}" - else "${directoryMap.duplicateMarked}/${it}" + if (it == "${idSample}.bam.metrics") "${directoryMap.markDuplicatesQC.minus(params.outDir+'/')}/${it}" + else "${directoryMap.duplicateMarked.minus(params.outDir+'/')}/${it}" } input: - set idPatient, status, idSample, file(bam) from mergedBam + set idPatient, status, idSample, file("${idSample}.bam") from mergedBam output: set idPatient, file("${idSample}_${status}.md.bam"), file("${idSample}_${status}.md.bai") into duplicateMarkedBams set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai") into markDuplicatesTSV - file ("${bam}.metrics") into markDuplicatesReport + file ("${idSample}.bam.metrics") into markDuplicatesReport when: step == 'mapping' && !params.onlyQC script: """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ + gatk --java-options ${params.markdup_java_options} \ MarkDuplicates \ --MAX_RECORDS_IN_RAM 50000 \ - --INPUT ${bam} \ - --METRICS_FILE ${bam}.metrics \ + --INPUT ${idSample}.bam \ + --METRICS_FILE ${idSample}.bam.metrics \ --TMP_DIR . \ --ASSUME_SORT_ORDER coordinate \ --CREATE_INDEX true \ @@ -283,7 +324,7 @@ if (params.verbose) duplicateMarkedBams = duplicateMarkedBams.view { process CreateRecalibrationTable { tag {idPatient + "-" + idSample} - publishDir directoryMap.duplicateMarked, mode: 'link', overwrite: false + publishDir directoryMap.duplicateMarked, mode: params.publishDirMode, overwrite: false input: set idPatient, status, idSample, file(bam), file(bai) from mdBam // realignedBam @@ -311,7 +352,7 @@ process CreateRecalibrationTable { BaseRecalibrator \ --input ${bam} \ --output ${idSample}.recal.table \ - --TMP_DIR /tmp \ + --tmp-dir /tmp \ -R ${genomeFile} \ -L ${intervals} \ --known-sites ${dbsnp} \ @@ -350,7 +391,7 @@ recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status process RecalibrateBam { tag {idPatient + "-" + idSample} - publishDir directoryMap.recalibrated, mode: 'link' + publishDir directoryMap.recalibrated, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable @@ -377,8 +418,8 @@ process RecalibrateBam { --input ${bam} \ --output ${idSample}.recal.bam \ -L ${intervals} \ - --create-output-bam-index true \ - --bqsr-recal-file ${recalibrationReport} + --create-output-bam-index true \ + --bqsr-recal-file ${recalibrationReport} """ } // Creating a TSV file to restart from this step @@ -398,7 +439,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir directoryMap.samtoolsStats, mode: 'link' + publishDir directoryMap.samtoolsStats, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -416,23 +457,35 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { File : [${it.fileName}]" } -process RunBamQC { +process RunBamQCrecalibrated { tag {idPatient + "-" + idSample} - publishDir directoryMap.bamQC, mode: 'link' + publishDir directoryMap.bamQC, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC output: - file(idSample) into bamQCreport + file(idSample) into bamQCrecalibratedReport when: !params.noReports && !params.noBAMQC - script: QC.bamQC(bam,idSample,task.memory) + script: + """ + qualimap --java-mem-size=${task.memory.toGiga()}G \ + bamqc \ + -bam ${bam} \ + --paint-chromosome-limits \ + --genome-gc-distr HUMAN \ + -nt ${task.cpus} \ + -skip-duplicated \ + --skip-dup-mode 0 \ + -outdir ${idSample} \ + -outformat HTML + """ } -if (params.verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCrecalibratedReport = bamQCrecalibratedReport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } diff --git a/nextflow.config b/nextflow.config index c715feacc8..f8eca2dc6b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ manifest { mainScript = 'main.nf' name = 'Sarek' nextflowVersion = '>=0.32.0' - version = '2.2.1' + version = '2.2.2' } env { @@ -23,51 +23,59 @@ env { } profiles { - // Default profile for UPPMAX secure clusters - // Runs the pipeline locally on a single 16-core node - // Singularity images need to be set up - standard { + // AWS Batch with Docker profile + // Docker images will be pulled automatically + awsbatch { includeConfig 'conf/base.config' - includeConfig 'conf/uppmax-localhost.config' - includeConfig 'conf/singularity-path.config' + includeConfig 'conf/igenomes.config' + includeConfig 'conf/aws-batch.config' + includeConfig 'conf/docker.config' + includeConfig 'conf/resources.config' + includeConfig 'conf/containers.config' } - // slurm profile for UPPMAX secure clusters - // Runs the pipeline using the job scheduler + // Default config for german BinAC cluster + // Runs the pipeline using the pbs executor + // Singularity images will be pulled automatically + binac { + includeConfig 'conf/base.config' + includeConfig 'conf/genomes.config' + includeConfig 'conf/binac.config' + includeConfig 'conf/singularity.config' + includeConfig 'conf/resources.config' + includeConfig 'conf/containers.config' + } + // Default profile for BTB server + // Runs the pipeline locally // Singularity images need to be set up - slurm { + btb { includeConfig 'conf/base.config' - includeConfig 'conf/uppmax-slurm.config' + includeConfig 'conf/igenomes.config' + includeConfig 'conf/munin.config' includeConfig 'conf/singularity-path.config' } - // slurm profile for UPPMAX clusters - // Runs the pipeline using the job scheduler - // Singularity images will be pulled automatically - slurmDownload { + // Default config for CFC cluster in Tuebingen/Germany + cfc { includeConfig 'conf/base.config' - includeConfig 'conf/uppmax-slurm.config' + includeConfig 'conf/genomes.config' + includeConfig 'conf/cfc.config' includeConfig 'conf/singularity.config' + includeConfig 'conf/resources.config' includeConfig 'conf/containers.config' } // Small testing with Docker profile // Docker images will be pulled automatically docker { includeConfig 'conf/base.config' + includeConfig 'conf/genomes.config' includeConfig 'conf/travis.config' includeConfig 'conf/docker.config' includeConfig 'conf/containers.config' } - // AWS Batch with Docker profile - // Docker images will be pulled automatically - awsbatch { - includeConfig 'conf/base.config' - includeConfig 'conf/aws-batch.config' - includeConfig 'conf/docker.config' - includeConfig 'conf/containers.config' - } // Small testing with Singularity profile // Singularity images will be pulled automatically singularity { includeConfig 'conf/base.config' + includeConfig 'conf/genomes.config' includeConfig 'conf/travis.config' includeConfig 'conf/singularity.config' includeConfig 'conf/containers.config' @@ -76,17 +84,70 @@ profiles { // Singularity images need to be set up singularityPath { includeConfig 'conf/base.config' + includeConfig 'conf/genomes.config' includeConfig 'conf/travis.config' includeConfig 'conf/singularity-path.config' } - - // Default config for german BinAC cluster - // Runs the pipeline using the pbs executor + // slurm profile for UPPMAX secure clusters + // Runs the pipeline using the job scheduler + // Singularity images need to be set up + slurm { + includeConfig 'conf/base.config' + includeConfig 'conf/genomes.config' + includeConfig 'conf/uppmax-slurm.config' + includeConfig 'conf/singularity-path.config' + } + // slurm profile for UPPMAX clusters + // Runs the pipeline using the job scheduler // Singularity images will be pulled automatically - binac { + slurmDownload { includeConfig 'conf/base.config' - includeConfig 'conf/binac.config' + includeConfig 'conf/genomes.config' + includeConfig 'conf/uppmax-slurm.config' includeConfig 'conf/singularity.config' includeConfig 'conf/containers.config' } + // Default profile for UPPMAX secure clusters + // Runs the pipeline locally on a single 16-core node + // Singularity images need to be set up + standard { + includeConfig 'conf/base.config' + includeConfig 'conf/genomes.config' + includeConfig 'conf/uppmax-localhost.config' + includeConfig 'conf/singularity-path.config' + } +} + + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if(type == 'memory'){ + try { + if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if(type == 'time'){ + try { + if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if(type == 'cpus'){ + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } } diff --git a/runMultiQC.nf b/runMultiQC.nf index 497e1f7505..fc6e0dd704 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -37,6 +37,12 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + directoryMap = SarekUtils.defineDirectoryMap(params.outDir) /* ================================================================================ @@ -47,7 +53,7 @@ directoryMap = SarekUtils.defineDirectoryMap(params.outDir) startMessage() process GetVersionAll { - publishDir directoryMap.multiQC, mode: 'link' + publishDir directoryMap.multiQC, mode: params.publishDirMode input: file(versions) from Channel.fromPath("${directoryMap.version}/*").collect().ifEmpty(file ("empty")) @@ -94,7 +100,7 @@ reportsForMultiQC = Channel.empty() ).collect() process RunMultiQC { - publishDir directoryMap.multiQC, mode: 'link' + publishDir directoryMap.multiQC, mode: params.publishDirMode input: file (multiqcConfig) from createMultiQCconfig() diff --git a/scripts/containers.sh b/scripts/containers.sh index da6c2fd1ba..78dd9ae5ac 100755 --- a/scripts/containers.sh +++ b/scripts/containers.sh @@ -33,12 +33,9 @@ then if [[ $TEST = ANNOTATEVEP ]] then docker pull maxulysse/vepgrch37:latest - else + elif [[ $TEST = ANNOTATESNPEFF ]] + then docker pull maxulysse/snpeffgrch37:latest fi -fi - -if [[ $TEST = ANNOTATESNPEFF ]] && [[ $PROFILE = singularity ]] && [[ $TRAVIS == true ]] -then - singularity build $TMPDIR/maxulysse-snpeffgrch37-latest.simg docker://maxulysse/snpeffgrch37:latest + docker pull maxulysse/sarek:latest fi diff --git a/scripts/do_release.sh b/scripts/do_release.sh index 64f0220bcd..ca3a771aae 100755 --- a/scripts/do_release.sh +++ b/scripts/do_release.sh @@ -39,6 +39,7 @@ sed -i "s/\[Unreleased\]/[$RELEASE] - $CODENAME - $(date +'%Y-%m-%d')/g" CHANGEL sed -i "s/sarek-[0-9\.]\+/sarek-$RELEASE/g" Dockerfile sed -i "s/sarek-[0-9\.]\+/sarek-$RELEASE/g" environment.yml sed -i "s/sarek-[0-9\.]\+/sarek-$RELEASE/g" Singularity +sed -i "s/VERSION [0-9\.]\+/VERSION $RELEASE/g" Singularity sed -i "s/version = '[0-9\.]\+'/version = '$RELEASE'/g" nextflow.config git commit CHANGELOG.md Dockerfile environment.yml Singularity nextflow.config -m "preparing release $RELEASE [skip ci]" diff --git a/somaticVC.nf b/somaticVC.nf index ab77e879b4..03553ca698 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -26,8 +26,6 @@ kate: syntax groovy; space-indent on; indent-width 2; https://github.com/SciLifeLab/Sarek/README.md -------------------------------------------------------------------------------- Processes overview - - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files - - RunBamQC - Run qualimap BamQC on recalibrated BAM files - CreateIntervalBeds - Create and sort intervals into bed files - RunMutect2 - Run MuTect2 for Variant Calling (Parallelized processes) - RunFreeBayes - Run FreeBayes for Variant Calling (Parallelized processes) @@ -52,6 +50,12 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Check for awsbatch profile configuration +// make sure queue is defined +if (workflow.profile == 'awsbatch') { + if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!" +} + tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] directoryMap = SarekUtils.defineDirectoryMap(params.outDir) @@ -98,7 +102,7 @@ if (params.verbose) bamFiles = bamFiles.view { } // assume input is recalibrated, ignore explicitBqsrNeeded -(bamForBamQC, bamForSamToolsStats, recalibratedBam, recalTables) = bamFiles.into(4) +(recalibratedBam, recalTables) = bamFiles.into(2) recalTables = recalTables.map{ it + [null] } // null recalibration table means: do not use --BQSR @@ -110,48 +114,6 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { Files : [${it[3].fileName}, ${it[4].fileName}]" } -process RunSamtoolsStats { - tag {idPatient + "-" + idSample} - - publishDir directoryMap.samtoolsStats, mode: 'link' - - input: - set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats - - output: - file ("${bam}.samtools.stats.out") into samtoolsStatsReport - - when: !params.noReports - - script: QC.samtoolsStats(bam) -} - -if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { - "SAMTools stats report:\n\ - File : [${it.fileName}]" -} - -process RunBamQC { - tag {idPatient + "-" + idSample} - - publishDir directoryMap.bamQC, mode: 'link' - - input: - set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC - - output: - file(idSample) into bamQCreport - - when: !params.noReports && !params.noBAMQC - - script: QC.bamQC(bam,idSample,task.memory) -} - -if (params.verbose) bamQCreport = bamQCreport.view { - "BamQC report:\n\ - Dir : [${it.fileName}]" -} - // Here we have a recalibrated bam set, but we need to separate the bam files based on patient status. // The sample tsv config file which is formatted like: "subject status sample lane fastq1 fastq2" // cf fastqFiles channel, I decided just to add _status to the sample name to have less changes to do. @@ -279,14 +241,12 @@ process RunMutect2 { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFMT2 - set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(cosmic), file(cosmicIndex) from Channel.value([ + set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ referenceMap.genomeFile, referenceMap.genomeIndex, referenceMap.genomeDict, referenceMap.dbsnp, - referenceMap.dbsnpIndex, - referenceMap.cosmic, - referenceMap.cosmicIndex + referenceMap.dbsnpIndex ]) output: @@ -357,7 +317,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "_" + idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${directoryMap."$variantCaller"}", mode: 'link' + publishDir "${directoryMap."$variantCaller"}", mode: params.publishDirMode input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -392,7 +352,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunStrelka { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir directoryMap.strelka, mode: 'link' + publishDir directoryMap.strelka, mode: params.publishDirMode input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka @@ -449,7 +409,7 @@ if (params.verbose) strelkaOutput = strelkaOutput.view { process RunManta { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir directoryMap.manta, mode: 'link' + publishDir directoryMap.manta, mode: params.publishDirMode input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta @@ -503,7 +463,7 @@ if (params.verbose) mantaOutput = mantaOutput.view { process RunSingleManta { tag {idSample + " - Tumor-Only"} - publishDir directoryMap.manta, mode: 'link' + publishDir directoryMap.manta, mode: params.publishDirMode input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -562,7 +522,7 @@ bamsForStrelkaBP = bamsForStrelkaBP.map { process RunStrelkaBP { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir directoryMap.strelkabp, mode: 'link' + publishDir directoryMap.strelkabp, mode: params.publishDirMode input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from bamsForStrelkaBP @@ -654,7 +614,7 @@ alleleCountOutput = alleleCountOutput.map { process RunConvertAlleleCounts { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir directoryMap.ascat, mode: 'link' + publishDir directoryMap.ascat, mode: params.publishDirMode input: set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput @@ -676,7 +636,7 @@ process RunConvertAlleleCounts { process RunAscat { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir directoryMap.ascat, mode: 'link' + publishDir directoryMap.ascat, mode: params.publishDirMode input: set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput @@ -734,7 +694,7 @@ vcfForQC = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir directoryMap.bcftoolsStats, mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForBCFtools @@ -757,7 +717,7 @@ bcfReport.close() process RunVcftools { tag {vcf} - publishDir directoryMap.vcftools, mode: 'link' + publishDir directoryMap.vcftools, mode: params.publishDirMode input: set variantCaller, file(vcf) from vcfForVCFtools @@ -778,7 +738,7 @@ if (params.verbose) vcfReport = vcfReport.view { vcfReport.close() process GetVersionAlleleCount { - publishDir directoryMap.version, mode: 'link' + publishDir directoryMap.version, mode: params.publishDirMode output: file("v_*.txt") when: 'ascat' in tools && !params.onlyQC @@ -789,7 +749,7 @@ process GetVersionAlleleCount { } process GetVersionASCAT { - publishDir directoryMap.version, mode: 'link' + publishDir directoryMap.version, mode: params.publishDirMode output: file("v_*.txt") when: 'ascat' in tools && !params.onlyQC @@ -832,9 +792,6 @@ def defineReferenceMap() { 'acLoci' : checkParamReturnFile("acLoci"), 'dbsnp' : checkParamReturnFile("dbsnp"), 'dbsnpIndex' : checkParamReturnFile("dbsnpIndex"), - // cosmic VCF with VCF4.1 header - 'cosmic' : checkParamReturnFile("cosmic"), - 'cosmicIndex' : checkParamReturnFile("cosmicIndex"), // genome reference dictionary 'genomeDict' : checkParamReturnFile("genomeDict"), // FASTA genome reference @@ -923,8 +880,6 @@ def minimalInformationMessage() { log.info " Tag : " + params.tag log.info "Reference files used:" log.info " acLoci :\n\t" + referenceMap.acLoci - log.info " cosmic :\n\t" + referenceMap.cosmic - log.info "\t" + referenceMap.cosmicIndex log.info " dbsnp :\n\t" + referenceMap.dbsnp log.info "\t" + referenceMap.dbsnpIndex log.info " genome :\n\t" + referenceMap.genomeFile